1 /* $OpenBSD: uipc_socket.c,v 1.178 2017/03/03 09:41:20 mpi Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <net/if.h> 51 #include <sys/pool.h> 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #endif 56 57 void sbsync(struct sockbuf *, struct mbuf *); 58 59 int sosplice(struct socket *, int, off_t, struct timeval *); 60 void sounsplice(struct socket *, struct socket *, int); 61 void soidle(void *); 62 void sotask(void *); 63 int somove(struct socket *, int); 64 65 void filt_sordetach(struct knote *kn); 66 int filt_soread(struct knote *kn, long hint); 67 void filt_sowdetach(struct knote *kn); 68 int filt_sowrite(struct knote *kn, long hint); 69 int filt_solisten(struct knote *kn, long hint); 70 71 struct filterops solisten_filtops = 72 { 1, NULL, filt_sordetach, filt_solisten }; 73 struct filterops soread_filtops = 74 { 1, NULL, filt_sordetach, filt_soread }; 75 struct filterops sowrite_filtops = 76 { 1, NULL, filt_sowdetach, filt_sowrite }; 77 78 79 #ifndef SOMINCONN 80 #define SOMINCONN 80 81 #endif /* SOMINCONN */ 82 83 int somaxconn = SOMAXCONN; 84 int sominconn = SOMINCONN; 85 86 struct pool socket_pool; 87 #ifdef SOCKET_SPLICE 88 struct pool sosplice_pool; 89 struct taskq *sosplice_taskq; 90 #endif 91 92 void 93 soinit(void) 94 { 95 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 96 "sockpl", NULL); 97 #ifdef SOCKET_SPLICE 98 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 99 "sosppl", NULL); 100 #endif 101 } 102 103 /* 104 * Socket operation routines. 105 * These routines are called by the routines in 106 * sys_socket.c or from a system process, and 107 * implement the semantics of socket operations by 108 * switching out to the protocol specific routines. 109 */ 110 int 111 socreate(int dom, struct socket **aso, int type, int proto) 112 { 113 struct proc *p = curproc; /* XXX */ 114 struct protosw *prp; 115 struct socket *so; 116 int error, s; 117 118 if (proto) 119 prp = pffindproto(dom, proto, type); 120 else 121 prp = pffindtype(dom, type); 122 if (prp == NULL || prp->pr_usrreq == 0) 123 return (EPROTONOSUPPORT); 124 if (prp->pr_type != type) 125 return (EPROTOTYPE); 126 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 127 TAILQ_INIT(&so->so_q0); 128 TAILQ_INIT(&so->so_q); 129 so->so_type = type; 130 if (suser(p, 0) == 0) 131 so->so_state = SS_PRIV; 132 so->so_ruid = p->p_ucred->cr_ruid; 133 so->so_euid = p->p_ucred->cr_uid; 134 so->so_rgid = p->p_ucred->cr_rgid; 135 so->so_egid = p->p_ucred->cr_gid; 136 so->so_cpid = p->p_p->ps_pid; 137 so->so_proto = prp; 138 s = solock(so); 139 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 140 (struct mbuf *)(long)proto, NULL, p); 141 if (error) { 142 so->so_state |= SS_NOFDREF; 143 sofree(so); 144 sounlock(s); 145 return (error); 146 } 147 sounlock(s); 148 *aso = so; 149 return (0); 150 } 151 152 int 153 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 154 { 155 int s, error; 156 157 s = solock(so); 158 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 159 sounlock(s); 160 return (error); 161 } 162 163 int 164 solisten(struct socket *so, int backlog) 165 { 166 int s, error; 167 168 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 169 return (EOPNOTSUPP); 170 #ifdef SOCKET_SPLICE 171 if (isspliced(so) || issplicedback(so)) 172 return (EOPNOTSUPP); 173 #endif /* SOCKET_SPLICE */ 174 s = solock(so); 175 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 176 curproc); 177 if (error) { 178 sounlock(s); 179 return (error); 180 } 181 if (TAILQ_FIRST(&so->so_q) == NULL) 182 so->so_options |= SO_ACCEPTCONN; 183 if (backlog < 0 || backlog > somaxconn) 184 backlog = somaxconn; 185 if (backlog < sominconn) 186 backlog = sominconn; 187 so->so_qlimit = backlog; 188 sounlock(s); 189 return (0); 190 } 191 192 void 193 sofree(struct socket *so) 194 { 195 soassertlocked(so); 196 197 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 198 return; 199 if (so->so_head) { 200 /* 201 * We must not decommission a socket that's on the accept(2) 202 * queue. If we do, then accept(2) may hang after select(2) 203 * indicated that the listening socket was ready. 204 */ 205 if (!soqremque(so, 0)) 206 return; 207 } 208 #ifdef SOCKET_SPLICE 209 if (so->so_sp) { 210 if (issplicedback(so)) 211 sounsplice(so->so_sp->ssp_soback, so, 212 so->so_sp->ssp_soback != so); 213 if (isspliced(so)) 214 sounsplice(so, so->so_sp->ssp_socket, 0); 215 pool_put(&sosplice_pool, so->so_sp); 216 so->so_sp = NULL; 217 } 218 #endif /* SOCKET_SPLICE */ 219 sbrelease(&so->so_snd); 220 sorflush(so); 221 pool_put(&socket_pool, so); 222 } 223 224 /* 225 * Close a socket on last file table reference removal. 226 * Initiate disconnect if connected. 227 * Free socket when disconnect complete. 228 */ 229 int 230 soclose(struct socket *so) 231 { 232 struct socket *so2; 233 int s, error = 0; 234 235 s = solock(so); 236 if (so->so_options & SO_ACCEPTCONN) { 237 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 238 (void) soqremque(so2, 0); 239 (void) soabort(so2); 240 } 241 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 242 (void) soqremque(so2, 1); 243 (void) soabort(so2); 244 } 245 } 246 if (so->so_pcb == 0) 247 goto discard; 248 if (so->so_state & SS_ISCONNECTED) { 249 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 250 error = sodisconnect(so); 251 if (error) 252 goto drop; 253 } 254 if (so->so_options & SO_LINGER) { 255 if ((so->so_state & SS_ISDISCONNECTING) && 256 (so->so_state & SS_NBIO)) 257 goto drop; 258 while (so->so_state & SS_ISCONNECTED) { 259 error = sosleep(so, &so->so_timeo, 260 PSOCK | PCATCH, "netcls", 261 so->so_linger * hz); 262 if (error) 263 break; 264 } 265 } 266 } 267 drop: 268 if (so->so_pcb) { 269 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 270 NULL, NULL, curproc); 271 if (error == 0) 272 error = error2; 273 } 274 discard: 275 if (so->so_state & SS_NOFDREF) 276 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 277 so->so_state |= SS_NOFDREF; 278 sofree(so); 279 sounlock(s); 280 return (error); 281 } 282 283 int 284 soabort(struct socket *so) 285 { 286 soassertlocked(so); 287 288 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 289 curproc); 290 } 291 292 int 293 soaccept(struct socket *so, struct mbuf *nam) 294 { 295 int error = 0; 296 297 soassertlocked(so); 298 299 if ((so->so_state & SS_NOFDREF) == 0) 300 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 301 so->so_state &= ~SS_NOFDREF; 302 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 303 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 304 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 305 nam, NULL, curproc); 306 else 307 error = ECONNABORTED; 308 return (error); 309 } 310 311 int 312 soconnect(struct socket *so, struct mbuf *nam) 313 { 314 int s, error; 315 316 if (so->so_options & SO_ACCEPTCONN) 317 return (EOPNOTSUPP); 318 s = solock(so); 319 /* 320 * If protocol is connection-based, can only connect once. 321 * Otherwise, if connected, try to disconnect first. 322 * This allows user to disconnect by connecting to, e.g., 323 * a null address. 324 */ 325 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 326 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 327 (error = sodisconnect(so)))) 328 error = EISCONN; 329 else 330 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 331 NULL, nam, NULL, curproc); 332 sounlock(s); 333 return (error); 334 } 335 336 int 337 soconnect2(struct socket *so1, struct socket *so2) 338 { 339 int s, error; 340 341 s = solock(so1); 342 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 343 (struct mbuf *)so2, NULL, curproc); 344 sounlock(s); 345 return (error); 346 } 347 348 int 349 sodisconnect(struct socket *so) 350 { 351 int error; 352 353 soassertlocked(so); 354 355 if ((so->so_state & SS_ISCONNECTED) == 0) 356 return (ENOTCONN); 357 if (so->so_state & SS_ISDISCONNECTING) 358 return (EALREADY); 359 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 360 NULL, curproc); 361 return (error); 362 } 363 364 int m_getuio(struct mbuf **, int, long, struct uio *); 365 366 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 367 /* 368 * Send on a socket. 369 * If send must go all at once and message is larger than 370 * send buffering, then hard error. 371 * Lock against other senders. 372 * If must go all at once and not enough room now, then 373 * inform user that this would block and do nothing. 374 * Otherwise, if nonblocking, send as much as possible. 375 * The data to be sent is described by "uio" if nonzero, 376 * otherwise by the mbuf chain "top" (which must be null 377 * if uio is not). Data provided in mbuf chain must be small 378 * enough to send all at once. 379 * 380 * Returns nonzero on error, timeout or signal; callers 381 * must check for short counts if EINTR/ERESTART are returned. 382 * Data and control buffers are freed on return. 383 */ 384 int 385 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 386 struct mbuf *control, int flags) 387 { 388 long space, clen = 0; 389 size_t resid; 390 int error, s; 391 int atomic = sosendallatonce(so) || top; 392 393 if (uio) 394 resid = uio->uio_resid; 395 else 396 resid = top->m_pkthdr.len; 397 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 398 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 399 error = EINVAL; 400 goto out; 401 } 402 if (uio && uio->uio_procp) 403 uio->uio_procp->p_ru.ru_msgsnd++; 404 if (control) { 405 /* 406 * In theory clen should be unsigned (since control->m_len is). 407 * However, space must be signed, as it might be less than 0 408 * if we over-committed, and we must use a signed comparison 409 * of space and clen. 410 */ 411 clen = control->m_len; 412 /* reserve extra space for AF_LOCAL's internalize */ 413 if (so->so_proto->pr_domain->dom_family == AF_LOCAL && 414 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 415 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 416 clen = CMSG_SPACE( 417 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 418 (sizeof(struct fdpass) / sizeof(int))); 419 } 420 421 #define snderr(errno) { error = errno; sounlock(s); goto release; } 422 423 restart: 424 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags), NULL)) != 0) 425 goto out; 426 so->so_state |= SS_ISSENDING; 427 do { 428 s = solock(so); 429 if (so->so_state & SS_CANTSENDMORE) 430 snderr(EPIPE); 431 if (so->so_error) { 432 error = so->so_error; 433 so->so_error = 0; 434 snderr(error); 435 } 436 if ((so->so_state & SS_ISCONNECTED) == 0) { 437 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 438 if (!(resid == 0 && clen != 0)) 439 snderr(ENOTCONN); 440 } else if (addr == 0) 441 snderr(EDESTADDRREQ); 442 } 443 space = sbspace(&so->so_snd); 444 if (flags & MSG_OOB) 445 space += 1024; 446 if ((atomic && resid > so->so_snd.sb_hiwat) || 447 (so->so_proto->pr_domain->dom_family != AF_LOCAL && 448 clen > so->so_snd.sb_hiwat)) 449 snderr(EMSGSIZE); 450 if (space < clen || 451 (space - clen < resid && 452 (atomic || space < so->so_snd.sb_lowat))) { 453 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) 454 snderr(EWOULDBLOCK); 455 sbunlock(&so->so_snd); 456 error = sbwait(so, &so->so_snd); 457 so->so_state &= ~SS_ISSENDING; 458 sounlock(s); 459 if (error) 460 goto out; 461 goto restart; 462 } 463 sounlock(s); 464 space -= clen; 465 do { 466 if (uio == NULL) { 467 /* 468 * Data is prepackaged in "top". 469 */ 470 resid = 0; 471 if (flags & MSG_EOR) 472 top->m_flags |= M_EOR; 473 } else { 474 error = m_getuio(&top, atomic, 475 space, uio); 476 if (error) 477 goto release; 478 space -= top->m_pkthdr.len; 479 resid = uio->uio_resid; 480 if (flags & MSG_EOR) 481 top->m_flags |= M_EOR; 482 } 483 s = solock(so); 484 if (resid == 0) 485 so->so_state &= ~SS_ISSENDING; 486 error = (*so->so_proto->pr_usrreq)(so, 487 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 488 top, addr, control, curproc); 489 sounlock(s); 490 clen = 0; 491 control = NULL; 492 top = NULL; 493 if (error) 494 goto release; 495 } while (resid && space > 0); 496 } while (resid); 497 498 release: 499 so->so_state &= ~SS_ISSENDING; 500 sbunlock(&so->so_snd); 501 out: 502 m_freem(top); 503 m_freem(control); 504 return (error); 505 } 506 507 int 508 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 509 { 510 struct mbuf *m, *top = NULL; 511 struct mbuf **nextp = ⊤ 512 u_long len, mlen; 513 size_t resid = uio->uio_resid; 514 int error; 515 516 do { 517 if (top == NULL) { 518 MGETHDR(m, M_WAIT, MT_DATA); 519 mlen = MHLEN; 520 m->m_pkthdr.len = 0; 521 m->m_pkthdr.ph_ifidx = 0; 522 } else { 523 MGET(m, M_WAIT, MT_DATA); 524 mlen = MLEN; 525 } 526 /* chain mbuf together */ 527 *nextp = m; 528 nextp = &m->m_next; 529 530 resid = ulmin(resid, space); 531 if (resid >= MINCLSIZE) { 532 MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); 533 if ((m->m_flags & M_EXT) == 0) 534 MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); 535 if ((m->m_flags & M_EXT) == 0) 536 goto nopages; 537 mlen = m->m_ext.ext_size; 538 len = ulmin(mlen, resid); 539 /* 540 * For datagram protocols, leave room 541 * for protocol headers in first mbuf. 542 */ 543 if (atomic && top == NULL && len < mlen - max_hdr) 544 m->m_data += max_hdr; 545 } else { 546 nopages: 547 len = ulmin(mlen, resid); 548 /* 549 * For datagram protocols, leave room 550 * for protocol headers in first mbuf. 551 */ 552 if (atomic && top == NULL && len < mlen - max_hdr) 553 MH_ALIGN(m, len); 554 } 555 556 error = uiomove(mtod(m, caddr_t), len, uio); 557 if (error) { 558 m_freem(top); 559 return (error); 560 } 561 562 /* adjust counters */ 563 resid = uio->uio_resid; 564 space -= len; 565 m->m_len = len; 566 top->m_pkthdr.len += len; 567 568 /* Is there more space and more data? */ 569 } while (space > 0 && resid > 0); 570 571 *mp = top; 572 return 0; 573 } 574 575 /* 576 * Following replacement or removal of the first mbuf on the first 577 * mbuf chain of a socket buffer, push necessary state changes back 578 * into the socket buffer so that other consumers see the values 579 * consistently. 'nextrecord' is the callers locally stored value of 580 * the original value of sb->sb_mb->m_nextpkt which must be restored 581 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 582 */ 583 void 584 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 585 { 586 587 /* 588 * First, update for the new value of nextrecord. If necessary, 589 * make it the first record. 590 */ 591 if (sb->sb_mb != NULL) 592 sb->sb_mb->m_nextpkt = nextrecord; 593 else 594 sb->sb_mb = nextrecord; 595 596 /* 597 * Now update any dependent socket buffer fields to reflect 598 * the new state. This is an inline of SB_EMPTY_FIXUP, with 599 * the addition of a second clause that takes care of the 600 * case where sb_mb has been updated, but remains the last 601 * record. 602 */ 603 if (sb->sb_mb == NULL) { 604 sb->sb_mbtail = NULL; 605 sb->sb_lastrecord = NULL; 606 } else if (sb->sb_mb->m_nextpkt == NULL) 607 sb->sb_lastrecord = sb->sb_mb; 608 } 609 610 /* 611 * Implement receive operations on a socket. 612 * We depend on the way that records are added to the sockbuf 613 * by sbappend*. In particular, each record (mbufs linked through m_next) 614 * must begin with an address if the protocol so specifies, 615 * followed by an optional mbuf or mbufs containing ancillary data, 616 * and then zero or more mbufs of data. 617 * In order to avoid blocking network for the entire time here, we release 618 * the solock() while doing the actual copy to user space. 619 * Although the sockbuf is locked, new data may still be appended, 620 * and thus we must maintain consistency of the sockbuf during that time. 621 * 622 * The caller may receive the data as a single mbuf chain by supplying 623 * an mbuf **mp0 for use in returning the chain. The uio is then used 624 * only for the count in uio_resid. 625 */ 626 int 627 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 628 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 629 socklen_t controllen) 630 { 631 struct mbuf *m, **mp; 632 struct mbuf *cm; 633 u_long len, offset, moff; 634 int flags, error, s, type, uio_error = 0; 635 struct protosw *pr = so->so_proto; 636 struct mbuf *nextrecord; 637 size_t resid, orig_resid = uio->uio_resid; 638 639 mp = mp0; 640 if (paddr) 641 *paddr = 0; 642 if (controlp) 643 *controlp = 0; 644 if (flagsp) 645 flags = *flagsp &~ MSG_EOR; 646 else 647 flags = 0; 648 if (so->so_state & SS_NBIO) 649 flags |= MSG_DONTWAIT; 650 if (flags & MSG_OOB) { 651 m = m_get(M_WAIT, MT_DATA); 652 s = solock(so); 653 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 654 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 655 sounlock(s); 656 if (error) 657 goto bad; 658 do { 659 error = uiomove(mtod(m, caddr_t), 660 ulmin(uio->uio_resid, m->m_len), uio); 661 m = m_free(m); 662 } while (uio->uio_resid && error == 0 && m); 663 bad: 664 m_freem(m); 665 return (error); 666 } 667 if (mp) 668 *mp = NULL; 669 670 restart: 671 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags), NULL)) != 0) 672 return (error); 673 s = solock(so); 674 675 m = so->so_rcv.sb_mb; 676 #ifdef SOCKET_SPLICE 677 if (isspliced(so)) 678 m = NULL; 679 #endif /* SOCKET_SPLICE */ 680 /* 681 * If we have less data than requested, block awaiting more 682 * (subject to any timeout) if: 683 * 1. the current count is less than the low water mark, 684 * 2. MSG_WAITALL is set, and it is possible to do the entire 685 * receive operation at once if we block (resid <= hiwat), or 686 * 3. MSG_DONTWAIT is not set. 687 * If MSG_WAITALL is set but resid is larger than the receive buffer, 688 * we have to do the receive in sections, and thus risk returning 689 * a short count if a timeout or signal occurs after we start. 690 */ 691 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 692 so->so_rcv.sb_cc < uio->uio_resid) && 693 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 694 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 695 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 696 #ifdef DIAGNOSTIC 697 if (m == NULL && so->so_rcv.sb_cc) 698 #ifdef SOCKET_SPLICE 699 if (!isspliced(so)) 700 #endif /* SOCKET_SPLICE */ 701 panic("receive 1: so %p, so_type %d, sb_cc %lu", 702 so, so->so_type, so->so_rcv.sb_cc); 703 #endif 704 if (so->so_error) { 705 if (m) 706 goto dontblock; 707 error = so->so_error; 708 if ((flags & MSG_PEEK) == 0) 709 so->so_error = 0; 710 goto release; 711 } 712 if (so->so_state & SS_CANTRCVMORE) { 713 if (m) 714 goto dontblock; 715 else if (so->so_rcv.sb_cc == 0) 716 goto release; 717 } 718 for (; m; m = m->m_next) 719 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 720 m = so->so_rcv.sb_mb; 721 goto dontblock; 722 } 723 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 724 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 725 error = ENOTCONN; 726 goto release; 727 } 728 if (uio->uio_resid == 0 && controlp == NULL) 729 goto release; 730 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 731 error = EWOULDBLOCK; 732 goto release; 733 } 734 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 735 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 736 sbunlock(&so->so_rcv); 737 error = sbwait(so, &so->so_rcv); 738 sounlock(s); 739 if (error) 740 return (error); 741 goto restart; 742 } 743 dontblock: 744 /* 745 * On entry here, m points to the first record of the socket buffer. 746 * From this point onward, we maintain 'nextrecord' as a cache of the 747 * pointer to the next record in the socket buffer. We must keep the 748 * various socket buffer pointers and local stack versions of the 749 * pointers in sync, pushing out modifications before operations that 750 * may sleep, and re-reading them afterwards. 751 * 752 * Otherwise, we will race with the network stack appending new data 753 * or records onto the socket buffer by using inconsistent/stale 754 * versions of the field, possibly resulting in socket buffer 755 * corruption. 756 */ 757 if (uio->uio_procp) 758 uio->uio_procp->p_ru.ru_msgrcv++; 759 KASSERT(m == so->so_rcv.sb_mb); 760 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 761 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 762 nextrecord = m->m_nextpkt; 763 if (pr->pr_flags & PR_ADDR) { 764 #ifdef DIAGNOSTIC 765 if (m->m_type != MT_SONAME) 766 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 767 so, so->so_type, m, m->m_type); 768 #endif 769 orig_resid = 0; 770 if (flags & MSG_PEEK) { 771 if (paddr) 772 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 773 m = m->m_next; 774 } else { 775 sbfree(&so->so_rcv, m); 776 if (paddr) { 777 *paddr = m; 778 so->so_rcv.sb_mb = m->m_next; 779 m->m_next = 0; 780 m = so->so_rcv.sb_mb; 781 } else { 782 so->so_rcv.sb_mb = m_free(m); 783 m = so->so_rcv.sb_mb; 784 } 785 sbsync(&so->so_rcv, nextrecord); 786 } 787 } 788 while (m && m->m_type == MT_CONTROL && error == 0) { 789 if (flags & MSG_PEEK) { 790 if (controlp) 791 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 792 m = m->m_next; 793 } else { 794 sbfree(&so->so_rcv, m); 795 so->so_rcv.sb_mb = m->m_next; 796 m->m_nextpkt = m->m_next = NULL; 797 cm = m; 798 m = so->so_rcv.sb_mb; 799 sbsync(&so->so_rcv, nextrecord); 800 if (controlp) { 801 if (pr->pr_domain->dom_externalize && 802 mtod(cm, struct cmsghdr *)->cmsg_type == 803 SCM_RIGHTS) { 804 error = 805 (*pr->pr_domain->dom_externalize) 806 (cm, controllen, flags); 807 } 808 *controlp = cm; 809 } else { 810 /* 811 * Dispose of any SCM_RIGHTS message that went 812 * through the read path rather than recv. 813 */ 814 if (pr->pr_domain->dom_dispose && 815 mtod(cm, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 816 pr->pr_domain->dom_dispose(cm); 817 m_free(cm); 818 } 819 } 820 if (m != NULL) 821 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 822 else 823 nextrecord = so->so_rcv.sb_mb; 824 if (controlp) { 825 orig_resid = 0; 826 controlp = &(*controlp)->m_next; 827 } 828 } 829 830 /* If m is non-NULL, we have some data to read. */ 831 if (m) { 832 type = m->m_type; 833 if (type == MT_OOBDATA) 834 flags |= MSG_OOB; 835 if (m->m_flags & M_BCAST) 836 flags |= MSG_BCAST; 837 if (m->m_flags & M_MCAST) 838 flags |= MSG_MCAST; 839 } 840 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 841 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 842 843 moff = 0; 844 offset = 0; 845 while (m && uio->uio_resid > 0 && error == 0) { 846 if (m->m_type == MT_OOBDATA) { 847 if (type != MT_OOBDATA) 848 break; 849 } else if (type == MT_OOBDATA) 850 break; 851 #ifdef DIAGNOSTIC 852 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 853 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 854 so, so->so_type, m, m->m_type); 855 #endif 856 so->so_state &= ~SS_RCVATMARK; 857 len = uio->uio_resid; 858 if (so->so_oobmark && len > so->so_oobmark - offset) 859 len = so->so_oobmark - offset; 860 if (len > m->m_len - moff) 861 len = m->m_len - moff; 862 /* 863 * If mp is set, just pass back the mbufs. 864 * Otherwise copy them out via the uio, then free. 865 * Sockbuf must be consistent here (points to current mbuf, 866 * it points to next record) when we drop priority; 867 * we must note any additions to the sockbuf when we 868 * block interrupts again. 869 */ 870 if (mp == NULL && uio_error == 0) { 871 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 872 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 873 resid = uio->uio_resid; 874 sounlock(s); 875 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 876 s = solock(so); 877 if (uio_error) 878 uio->uio_resid = resid - len; 879 } else 880 uio->uio_resid -= len; 881 if (len == m->m_len - moff) { 882 if (m->m_flags & M_EOR) 883 flags |= MSG_EOR; 884 if (flags & MSG_PEEK) { 885 m = m->m_next; 886 moff = 0; 887 } else { 888 nextrecord = m->m_nextpkt; 889 sbfree(&so->so_rcv, m); 890 if (mp) { 891 *mp = m; 892 mp = &m->m_next; 893 so->so_rcv.sb_mb = m = m->m_next; 894 *mp = NULL; 895 } else { 896 so->so_rcv.sb_mb = m_free(m); 897 m = so->so_rcv.sb_mb; 898 } 899 /* 900 * If m != NULL, we also know that 901 * so->so_rcv.sb_mb != NULL. 902 */ 903 KASSERT(so->so_rcv.sb_mb == m); 904 if (m) { 905 m->m_nextpkt = nextrecord; 906 if (nextrecord == NULL) 907 so->so_rcv.sb_lastrecord = m; 908 } else { 909 so->so_rcv.sb_mb = nextrecord; 910 SB_EMPTY_FIXUP(&so->so_rcv); 911 } 912 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 913 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 914 } 915 } else { 916 if (flags & MSG_PEEK) 917 moff += len; 918 else { 919 if (mp) 920 *mp = m_copym(m, 0, len, M_WAIT); 921 m->m_data += len; 922 m->m_len -= len; 923 so->so_rcv.sb_cc -= len; 924 so->so_rcv.sb_datacc -= len; 925 } 926 } 927 if (so->so_oobmark) { 928 if ((flags & MSG_PEEK) == 0) { 929 so->so_oobmark -= len; 930 if (so->so_oobmark == 0) { 931 so->so_state |= SS_RCVATMARK; 932 break; 933 } 934 } else { 935 offset += len; 936 if (offset == so->so_oobmark) 937 break; 938 } 939 } 940 if (flags & MSG_EOR) 941 break; 942 /* 943 * If the MSG_WAITALL flag is set (for non-atomic socket), 944 * we must not quit until "uio->uio_resid == 0" or an error 945 * termination. If a signal/timeout occurs, return 946 * with a short count but without error. 947 * Keep sockbuf locked against other readers. 948 */ 949 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 950 !sosendallatonce(so) && !nextrecord) { 951 if (so->so_error || so->so_state & SS_CANTRCVMORE) 952 break; 953 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 954 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 955 error = sbwait(so, &so->so_rcv); 956 if (error) { 957 sbunlock(&so->so_rcv); 958 sounlock(s); 959 return (0); 960 } 961 if ((m = so->so_rcv.sb_mb) != NULL) 962 nextrecord = m->m_nextpkt; 963 } 964 } 965 966 if (m && pr->pr_flags & PR_ATOMIC) { 967 flags |= MSG_TRUNC; 968 if ((flags & MSG_PEEK) == 0) 969 (void) sbdroprecord(&so->so_rcv); 970 } 971 if ((flags & MSG_PEEK) == 0) { 972 if (m == NULL) { 973 /* 974 * First part is an inline SB_EMPTY_FIXUP(). Second 975 * part makes sure sb_lastrecord is up-to-date if 976 * there is still data in the socket buffer. 977 */ 978 so->so_rcv.sb_mb = nextrecord; 979 if (so->so_rcv.sb_mb == NULL) { 980 so->so_rcv.sb_mbtail = NULL; 981 so->so_rcv.sb_lastrecord = NULL; 982 } else if (nextrecord->m_nextpkt == NULL) 983 so->so_rcv.sb_lastrecord = nextrecord; 984 } 985 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 986 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 987 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 988 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 989 (struct mbuf *)(long)flags, NULL, curproc); 990 } 991 if (orig_resid == uio->uio_resid && orig_resid && 992 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 993 sbunlock(&so->so_rcv); 994 sounlock(s); 995 goto restart; 996 } 997 998 if (uio_error) 999 error = uio_error; 1000 1001 if (flagsp) 1002 *flagsp |= flags; 1003 release: 1004 sbunlock(&so->so_rcv); 1005 sounlock(s); 1006 return (error); 1007 } 1008 1009 int 1010 soshutdown(struct socket *so, int how) 1011 { 1012 struct protosw *pr = so->so_proto; 1013 int s, error = 0; 1014 1015 s = solock(so); 1016 switch (how) { 1017 case SHUT_RD: 1018 case SHUT_RDWR: 1019 sorflush(so); 1020 if (how == SHUT_RD) 1021 break; 1022 /* FALLTHROUGH */ 1023 case SHUT_WR: 1024 error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 1025 curproc); 1026 break; 1027 default: 1028 error = EINVAL; 1029 break; 1030 } 1031 sounlock(s); 1032 1033 return (error); 1034 } 1035 1036 void 1037 sorflush(struct socket *so) 1038 { 1039 struct sockbuf *sb = &so->so_rcv; 1040 struct protosw *pr = so->so_proto; 1041 struct sockbuf asb; 1042 1043 sb->sb_flags |= SB_NOINTR; 1044 sblock(sb, M_WAITOK, 1045 (pr->pr_domain->dom_family != PF_LOCAL) ? &netlock : NULL); 1046 socantrcvmore(so); 1047 sbunlock(sb); 1048 asb = *sb; 1049 memset(sb, 0, sizeof (*sb)); 1050 /* XXX - the memset stomps all over so_rcv */ 1051 if (asb.sb_flags & SB_KNOTE) { 1052 sb->sb_sel.si_note = asb.sb_sel.si_note; 1053 sb->sb_flags = SB_KNOTE; 1054 } 1055 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1056 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1057 sbrelease(&asb); 1058 } 1059 1060 #ifdef SOCKET_SPLICE 1061 1062 #define so_splicelen so_sp->ssp_len 1063 #define so_splicemax so_sp->ssp_max 1064 #define so_idletv so_sp->ssp_idletv 1065 #define so_idleto so_sp->ssp_idleto 1066 #define so_splicetask so_sp->ssp_task 1067 1068 int 1069 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1070 { 1071 struct file *fp; 1072 struct socket *sosp; 1073 int s, error = 0; 1074 1075 if (sosplice_taskq == NULL) 1076 sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 0); 1077 if (sosplice_taskq == NULL) 1078 return (ENOMEM); 1079 1080 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1081 return (EPROTONOSUPPORT); 1082 if (so->so_options & SO_ACCEPTCONN) 1083 return (EOPNOTSUPP); 1084 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1085 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1086 return (ENOTCONN); 1087 if (so->so_sp == NULL) 1088 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1089 1090 /* If no fd is given, unsplice by removing existing link. */ 1091 if (fd < 0) { 1092 /* Lock receive buffer. */ 1093 if ((error = sblock(&so->so_rcv, 1094 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK, NULL)) != 0) 1095 return (error); 1096 s = solock(so); 1097 if (so->so_sp->ssp_socket) 1098 sounsplice(so, so->so_sp->ssp_socket, 1); 1099 sounlock(s); 1100 sbunlock(&so->so_rcv); 1101 return (0); 1102 } 1103 1104 if (max && max < 0) 1105 return (EINVAL); 1106 1107 if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0)) 1108 return (EINVAL); 1109 1110 /* Find sosp, the drain socket where data will be spliced into. */ 1111 if ((error = getsock(curproc, fd, &fp)) != 0) 1112 return (error); 1113 sosp = fp->f_data; 1114 if (sosp->so_sp == NULL) 1115 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1116 1117 /* Lock both receive and send buffer. */ 1118 if ((error = sblock(&so->so_rcv, 1119 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK, NULL)) != 0) { 1120 FRELE(fp, curproc); 1121 return (error); 1122 } 1123 if ((error = sblock(&sosp->so_snd, M_WAITOK, NULL)) != 0) { 1124 sbunlock(&so->so_rcv); 1125 FRELE(fp, curproc); 1126 return (error); 1127 } 1128 s = solock(so); 1129 1130 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1131 error = EBUSY; 1132 goto release; 1133 } 1134 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1135 error = EPROTONOSUPPORT; 1136 goto release; 1137 } 1138 if (sosp->so_options & SO_ACCEPTCONN) { 1139 error = EOPNOTSUPP; 1140 goto release; 1141 } 1142 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1143 error = ENOTCONN; 1144 goto release; 1145 } 1146 1147 /* Splice so and sosp together. */ 1148 so->so_sp->ssp_socket = sosp; 1149 sosp->so_sp->ssp_soback = so; 1150 so->so_splicelen = 0; 1151 so->so_splicemax = max; 1152 if (tv) 1153 so->so_idletv = *tv; 1154 else 1155 timerclear(&so->so_idletv); 1156 timeout_set(&so->so_idleto, soidle, so); 1157 task_set(&so->so_splicetask, sotask, so); 1158 1159 /* 1160 * To prevent softnet interrupt from calling somove() while 1161 * we sleep, the socket buffers are not marked as spliced yet. 1162 */ 1163 if (somove(so, M_WAIT)) { 1164 so->so_rcv.sb_flagsintr |= SB_SPLICE; 1165 sosp->so_snd.sb_flagsintr |= SB_SPLICE; 1166 } 1167 1168 release: 1169 sounlock(s); 1170 sbunlock(&sosp->so_snd); 1171 sbunlock(&so->so_rcv); 1172 FRELE(fp, curproc); 1173 return (error); 1174 } 1175 1176 void 1177 sounsplice(struct socket *so, struct socket *sosp, int wakeup) 1178 { 1179 soassertlocked(so); 1180 1181 task_del(sosplice_taskq, &so->so_splicetask); 1182 timeout_del(&so->so_idleto); 1183 sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; 1184 so->so_rcv.sb_flagsintr &= ~SB_SPLICE; 1185 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1186 if (wakeup && soreadable(so)) 1187 sorwakeup(so); 1188 } 1189 1190 void 1191 soidle(void *arg) 1192 { 1193 struct socket *so = arg; 1194 int s; 1195 1196 s = solock(so); 1197 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1198 so->so_error = ETIMEDOUT; 1199 sounsplice(so, so->so_sp->ssp_socket, 1); 1200 } 1201 sounlock(s); 1202 } 1203 1204 void 1205 sotask(void *arg) 1206 { 1207 struct socket *so = arg; 1208 int s; 1209 1210 s = solock(so); 1211 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1212 /* 1213 * We may not sleep here as sofree() and unsplice() may be 1214 * called from softnet interrupt context. This would remove 1215 * the socket during somove(). 1216 */ 1217 somove(so, M_DONTWAIT); 1218 } 1219 sounlock(s); 1220 1221 /* Avoid user land starvation. */ 1222 yield(); 1223 } 1224 1225 /* 1226 * Move data from receive buffer of spliced source socket to send 1227 * buffer of drain socket. Try to move as much as possible in one 1228 * big chunk. It is a TCP only implementation. 1229 * Return value 0 means splicing has been finished, 1 continue. 1230 */ 1231 int 1232 somove(struct socket *so, int wait) 1233 { 1234 struct socket *sosp = so->so_sp->ssp_socket; 1235 struct mbuf *m, **mp, *nextrecord; 1236 u_long len, off, oobmark; 1237 long space; 1238 int error = 0, maxreached = 0; 1239 short state; 1240 1241 soassertlocked(so); 1242 1243 nextpkt: 1244 if (so->so_error) { 1245 error = so->so_error; 1246 goto release; 1247 } 1248 if (sosp->so_state & SS_CANTSENDMORE) { 1249 error = EPIPE; 1250 goto release; 1251 } 1252 if (sosp->so_error && sosp->so_error != ETIMEDOUT && 1253 sosp->so_error != EFBIG && sosp->so_error != ELOOP) { 1254 error = sosp->so_error; 1255 goto release; 1256 } 1257 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1258 goto release; 1259 1260 /* Calculate how many bytes can be copied now. */ 1261 len = so->so_rcv.sb_datacc; 1262 if (so->so_splicemax) { 1263 KASSERT(so->so_splicelen < so->so_splicemax); 1264 if (so->so_splicemax <= so->so_splicelen + len) { 1265 len = so->so_splicemax - so->so_splicelen; 1266 maxreached = 1; 1267 } 1268 } 1269 space = sbspace(&sosp->so_snd); 1270 if (so->so_oobmark && so->so_oobmark < len && 1271 so->so_oobmark < space + 1024) 1272 space += 1024; 1273 if (space <= 0) { 1274 maxreached = 0; 1275 goto release; 1276 } 1277 if (space < len) { 1278 maxreached = 0; 1279 if (space < sosp->so_snd.sb_lowat) 1280 goto release; 1281 len = space; 1282 } 1283 sosp->so_state |= SS_ISSENDING; 1284 1285 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1286 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1287 m = so->so_rcv.sb_mb; 1288 if (m == NULL) 1289 goto release; 1290 nextrecord = m->m_nextpkt; 1291 1292 /* Drop address and control information not used with splicing. */ 1293 if (so->so_proto->pr_flags & PR_ADDR) { 1294 #ifdef DIAGNOSTIC 1295 if (m->m_type != MT_SONAME) 1296 panic("somove soname: so %p, so_type %d, m %p, " 1297 "m_type %d", so, so->so_type, m, m->m_type); 1298 #endif 1299 m = m->m_next; 1300 } 1301 while (m && m->m_type == MT_CONTROL) 1302 m = m->m_next; 1303 if (m == NULL) { 1304 sbdroprecord(&so->so_rcv); 1305 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1306 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1307 NULL, NULL, NULL); 1308 goto nextpkt; 1309 } 1310 1311 /* 1312 * By splicing sockets connected to localhost, userland might create a 1313 * loop. Dissolve splicing with error if loop is detected by counter. 1314 */ 1315 if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 1316 error = ELOOP; 1317 goto release; 1318 } 1319 1320 if (so->so_proto->pr_flags & PR_ATOMIC) { 1321 if ((m->m_flags & M_PKTHDR) == 0) 1322 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1323 "m_type %d", so, so->so_type, m, m->m_type); 1324 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1325 error = EMSGSIZE; 1326 goto release; 1327 } 1328 if (len < m->m_pkthdr.len) 1329 goto release; 1330 if (m->m_pkthdr.len < len) { 1331 maxreached = 0; 1332 len = m->m_pkthdr.len; 1333 } 1334 /* 1335 * Throw away the name mbuf after it has been assured 1336 * that the whole first record can be processed. 1337 */ 1338 m = so->so_rcv.sb_mb; 1339 sbfree(&so->so_rcv, m); 1340 so->so_rcv.sb_mb = m_free(m); 1341 sbsync(&so->so_rcv, nextrecord); 1342 } 1343 /* 1344 * Throw away the control mbufs after it has been assured 1345 * that the whole first record can be processed. 1346 */ 1347 m = so->so_rcv.sb_mb; 1348 while (m && m->m_type == MT_CONTROL) { 1349 sbfree(&so->so_rcv, m); 1350 so->so_rcv.sb_mb = m_free(m); 1351 m = so->so_rcv.sb_mb; 1352 sbsync(&so->so_rcv, nextrecord); 1353 } 1354 1355 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1356 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1357 1358 /* Take at most len mbufs out of receive buffer. */ 1359 for (off = 0, mp = &m; off <= len && *mp; 1360 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1361 u_long size = len - off; 1362 1363 #ifdef DIAGNOSTIC 1364 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1365 panic("somove type: so %p, so_type %d, m %p, " 1366 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1367 #endif 1368 if ((*mp)->m_len > size) { 1369 /* 1370 * Move only a partial mbuf at maximum splice length or 1371 * if the drain buffer is too small for this large mbuf. 1372 */ 1373 if (!maxreached && so->so_snd.sb_datacc > 0) { 1374 len -= size; 1375 break; 1376 } 1377 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1378 if (*mp == NULL) { 1379 len -= size; 1380 break; 1381 } 1382 so->so_rcv.sb_mb->m_data += size; 1383 so->so_rcv.sb_mb->m_len -= size; 1384 so->so_rcv.sb_cc -= size; 1385 so->so_rcv.sb_datacc -= size; 1386 } else { 1387 *mp = so->so_rcv.sb_mb; 1388 sbfree(&so->so_rcv, *mp); 1389 so->so_rcv.sb_mb = (*mp)->m_next; 1390 sbsync(&so->so_rcv, nextrecord); 1391 } 1392 } 1393 *mp = NULL; 1394 1395 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1396 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1397 SBCHECK(&so->so_rcv); 1398 if (m == NULL) 1399 goto release; 1400 m->m_nextpkt = NULL; 1401 if (m->m_flags & M_PKTHDR) { 1402 m_resethdr(m); 1403 m->m_pkthdr.len = len; 1404 } 1405 1406 /* Send window update to source peer as receive buffer has changed. */ 1407 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1408 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1409 NULL, NULL, NULL); 1410 1411 /* Receive buffer did shrink by len bytes, adjust oob. */ 1412 state = so->so_state; 1413 so->so_state &= ~SS_RCVATMARK; 1414 oobmark = so->so_oobmark; 1415 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1416 if (oobmark) { 1417 if (oobmark == len) 1418 so->so_state |= SS_RCVATMARK; 1419 if (oobmark >= len) 1420 oobmark = 0; 1421 } 1422 1423 /* 1424 * Handle oob data. If any malloc fails, ignore error. 1425 * TCP urgent data is not very reliable anyway. 1426 */ 1427 while (((state & SS_RCVATMARK) || oobmark) && 1428 (so->so_options & SO_OOBINLINE)) { 1429 struct mbuf *o = NULL; 1430 1431 if (state & SS_RCVATMARK) { 1432 o = m_get(wait, MT_DATA); 1433 state &= ~SS_RCVATMARK; 1434 } else if (oobmark) { 1435 o = m_split(m, oobmark, wait); 1436 if (o) { 1437 error = (*sosp->so_proto->pr_usrreq)(sosp, 1438 PRU_SEND, m, NULL, NULL, NULL); 1439 if (error) { 1440 if (sosp->so_state & SS_CANTSENDMORE) 1441 error = EPIPE; 1442 m_freem(o); 1443 goto release; 1444 } 1445 len -= oobmark; 1446 so->so_splicelen += oobmark; 1447 m = o; 1448 o = m_get(wait, MT_DATA); 1449 } 1450 oobmark = 0; 1451 } 1452 if (o) { 1453 o->m_len = 1; 1454 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1455 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1456 o, NULL, NULL, NULL); 1457 if (error) { 1458 if (sosp->so_state & SS_CANTSENDMORE) 1459 error = EPIPE; 1460 m_freem(m); 1461 goto release; 1462 } 1463 len -= 1; 1464 so->so_splicelen += 1; 1465 if (oobmark) { 1466 oobmark -= 1; 1467 if (oobmark == 0) 1468 state |= SS_RCVATMARK; 1469 } 1470 m_adj(m, 1); 1471 } 1472 } 1473 1474 /* Append all remaining data to drain socket. */ 1475 if (so->so_rcv.sb_cc == 0 || maxreached) 1476 sosp->so_state &= ~SS_ISSENDING; 1477 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, 1478 NULL); 1479 if (error) { 1480 if (sosp->so_state & SS_CANTSENDMORE) 1481 error = EPIPE; 1482 goto release; 1483 } 1484 so->so_splicelen += len; 1485 1486 /* Move several packets if possible. */ 1487 if (!maxreached && nextrecord) 1488 goto nextpkt; 1489 1490 release: 1491 sosp->so_state &= ~SS_ISSENDING; 1492 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1493 error = EFBIG; 1494 if (error) 1495 so->so_error = error; 1496 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1497 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1498 sounsplice(so, sosp, 1); 1499 return (0); 1500 } 1501 if (timerisset(&so->so_idletv)) 1502 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1503 return (1); 1504 } 1505 1506 #endif /* SOCKET_SPLICE */ 1507 1508 void 1509 sorwakeup(struct socket *so) 1510 { 1511 soassertlocked(so); 1512 1513 #ifdef SOCKET_SPLICE 1514 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1515 /* 1516 * TCP has a sendbuffer that can handle multiple packets 1517 * at once. So queue the stream a bit to accumulate data. 1518 * The sosplice thread will call somove() later and send 1519 * the packets calling tcp_output() only once. 1520 * In the UDP case, send out the packets immediately. 1521 * Using a thread would make things slower. 1522 */ 1523 if (so->so_proto->pr_flags & PR_WANTRCVD) 1524 task_add(sosplice_taskq, &so->so_splicetask); 1525 else 1526 somove(so, M_DONTWAIT); 1527 } 1528 if (isspliced(so)) 1529 return; 1530 #endif 1531 sowakeup(so, &so->so_rcv); 1532 if (so->so_upcall) 1533 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1534 } 1535 1536 void 1537 sowwakeup(struct socket *so) 1538 { 1539 soassertlocked(so); 1540 1541 #ifdef SOCKET_SPLICE 1542 if (so->so_snd.sb_flagsintr & SB_SPLICE) 1543 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); 1544 #endif 1545 sowakeup(so, &so->so_snd); 1546 } 1547 1548 int 1549 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1550 { 1551 int s, error = 0; 1552 struct mbuf *m = m0; 1553 1554 if (level != SOL_SOCKET) { 1555 if (so->so_proto && so->so_proto->pr_ctloutput) { 1556 s = solock(so); 1557 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1558 level, optname, m0); 1559 sounlock(s); 1560 return (error); 1561 } 1562 error = ENOPROTOOPT; 1563 } else { 1564 switch (optname) { 1565 case SO_BINDANY: 1566 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1567 goto bad; 1568 break; 1569 } 1570 1571 switch (optname) { 1572 1573 case SO_LINGER: 1574 if (m == NULL || m->m_len != sizeof (struct linger) || 1575 mtod(m, struct linger *)->l_linger < 0 || 1576 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1577 error = EINVAL; 1578 goto bad; 1579 } 1580 so->so_linger = mtod(m, struct linger *)->l_linger; 1581 /* FALLTHROUGH */ 1582 1583 case SO_BINDANY: 1584 case SO_DEBUG: 1585 case SO_KEEPALIVE: 1586 case SO_USELOOPBACK: 1587 case SO_BROADCAST: 1588 case SO_REUSEADDR: 1589 case SO_REUSEPORT: 1590 case SO_OOBINLINE: 1591 case SO_TIMESTAMP: 1592 if (m == NULL || m->m_len < sizeof (int)) { 1593 error = EINVAL; 1594 goto bad; 1595 } 1596 if (*mtod(m, int *)) 1597 so->so_options |= optname; 1598 else 1599 so->so_options &= ~optname; 1600 break; 1601 1602 case SO_DONTROUTE: 1603 if (m == NULL || m->m_len < sizeof (int)) { 1604 error = EINVAL; 1605 goto bad; 1606 } 1607 if (*mtod(m, int *)) 1608 error = EOPNOTSUPP; 1609 break; 1610 1611 case SO_SNDBUF: 1612 case SO_RCVBUF: 1613 case SO_SNDLOWAT: 1614 case SO_RCVLOWAT: 1615 { 1616 u_long cnt; 1617 1618 if (m == NULL || m->m_len < sizeof (int)) { 1619 error = EINVAL; 1620 goto bad; 1621 } 1622 cnt = *mtod(m, int *); 1623 if ((long)cnt <= 0) 1624 cnt = 1; 1625 switch (optname) { 1626 1627 case SO_SNDBUF: 1628 if (so->so_state & SS_CANTSENDMORE) { 1629 error = EINVAL; 1630 goto bad; 1631 } 1632 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1633 sbreserve(&so->so_snd, cnt)) { 1634 error = ENOBUFS; 1635 goto bad; 1636 } 1637 so->so_snd.sb_wat = cnt; 1638 break; 1639 1640 case SO_RCVBUF: 1641 if (so->so_state & SS_CANTRCVMORE) { 1642 error = EINVAL; 1643 goto bad; 1644 } 1645 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1646 sbreserve(&so->so_rcv, cnt)) { 1647 error = ENOBUFS; 1648 goto bad; 1649 } 1650 so->so_rcv.sb_wat = cnt; 1651 break; 1652 1653 case SO_SNDLOWAT: 1654 so->so_snd.sb_lowat = 1655 (cnt > so->so_snd.sb_hiwat) ? 1656 so->so_snd.sb_hiwat : cnt; 1657 break; 1658 case SO_RCVLOWAT: 1659 so->so_rcv.sb_lowat = 1660 (cnt > so->so_rcv.sb_hiwat) ? 1661 so->so_rcv.sb_hiwat : cnt; 1662 break; 1663 } 1664 break; 1665 } 1666 1667 case SO_SNDTIMEO: 1668 case SO_RCVTIMEO: 1669 { 1670 struct timeval tv; 1671 int val; 1672 1673 if (m == NULL || m->m_len < sizeof (tv)) { 1674 error = EINVAL; 1675 goto bad; 1676 } 1677 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 1678 val = tvtohz(&tv); 1679 if (val > USHRT_MAX) { 1680 error = EDOM; 1681 goto bad; 1682 } 1683 1684 switch (optname) { 1685 1686 case SO_SNDTIMEO: 1687 so->so_snd.sb_timeo = val; 1688 break; 1689 case SO_RCVTIMEO: 1690 so->so_rcv.sb_timeo = val; 1691 break; 1692 } 1693 break; 1694 } 1695 1696 case SO_RTABLE: 1697 if (so->so_proto && so->so_proto->pr_domain && 1698 so->so_proto->pr_domain->dom_protosw && 1699 so->so_proto->pr_ctloutput) { 1700 struct domain *dom = so->so_proto->pr_domain; 1701 1702 level = dom->dom_protosw->pr_protocol; 1703 s = solock(so); 1704 error = (*so->so_proto->pr_ctloutput) 1705 (PRCO_SETOPT, so, level, optname, m0); 1706 sounlock(s); 1707 return (error); 1708 } 1709 error = ENOPROTOOPT; 1710 break; 1711 1712 #ifdef SOCKET_SPLICE 1713 case SO_SPLICE: 1714 if (m == NULL) { 1715 error = sosplice(so, -1, 0, NULL); 1716 } else if (m->m_len < sizeof(int)) { 1717 error = EINVAL; 1718 goto bad; 1719 } else if (m->m_len < sizeof(struct splice)) { 1720 error = sosplice(so, *mtod(m, int *), 0, NULL); 1721 } else { 1722 error = sosplice(so, 1723 mtod(m, struct splice *)->sp_fd, 1724 mtod(m, struct splice *)->sp_max, 1725 &mtod(m, struct splice *)->sp_idle); 1726 } 1727 break; 1728 #endif /* SOCKET_SPLICE */ 1729 1730 default: 1731 error = ENOPROTOOPT; 1732 break; 1733 } 1734 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1735 s = solock(so); 1736 (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1737 level, optname, m0); 1738 sounlock(s); 1739 m = NULL; /* freed by protocol */ 1740 } 1741 } 1742 bad: 1743 if (m) 1744 (void) m_free(m); 1745 return (error); 1746 } 1747 1748 int 1749 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1750 { 1751 int s, error = 0; 1752 struct mbuf *m; 1753 1754 if (level != SOL_SOCKET) { 1755 if (so->so_proto && so->so_proto->pr_ctloutput) { 1756 m = m_get(M_WAIT, MT_SOOPTS); 1757 m->m_len = 0; 1758 1759 s = solock(so); 1760 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 1761 level, optname, m); 1762 sounlock(s); 1763 if (error) { 1764 m_free(m); 1765 return (error); 1766 } 1767 *mp = m; 1768 return (0); 1769 } else 1770 return (ENOPROTOOPT); 1771 } else { 1772 m = m_get(M_WAIT, MT_SOOPTS); 1773 m->m_len = sizeof (int); 1774 1775 switch (optname) { 1776 1777 case SO_LINGER: 1778 m->m_len = sizeof (struct linger); 1779 mtod(m, struct linger *)->l_onoff = 1780 so->so_options & SO_LINGER; 1781 mtod(m, struct linger *)->l_linger = so->so_linger; 1782 break; 1783 1784 case SO_BINDANY: 1785 case SO_USELOOPBACK: 1786 case SO_DEBUG: 1787 case SO_KEEPALIVE: 1788 case SO_REUSEADDR: 1789 case SO_REUSEPORT: 1790 case SO_BROADCAST: 1791 case SO_OOBINLINE: 1792 case SO_TIMESTAMP: 1793 *mtod(m, int *) = so->so_options & optname; 1794 break; 1795 1796 case SO_DONTROUTE: 1797 *mtod(m, int *) = 0; 1798 break; 1799 1800 case SO_TYPE: 1801 *mtod(m, int *) = so->so_type; 1802 break; 1803 1804 case SO_ERROR: 1805 *mtod(m, int *) = so->so_error; 1806 so->so_error = 0; 1807 break; 1808 1809 case SO_SNDBUF: 1810 *mtod(m, int *) = so->so_snd.sb_hiwat; 1811 break; 1812 1813 case SO_RCVBUF: 1814 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1815 break; 1816 1817 case SO_SNDLOWAT: 1818 *mtod(m, int *) = so->so_snd.sb_lowat; 1819 break; 1820 1821 case SO_RCVLOWAT: 1822 *mtod(m, int *) = so->so_rcv.sb_lowat; 1823 break; 1824 1825 case SO_SNDTIMEO: 1826 case SO_RCVTIMEO: 1827 { 1828 struct timeval tv; 1829 int val = (optname == SO_SNDTIMEO ? 1830 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1831 1832 m->m_len = sizeof(struct timeval); 1833 memset(&tv, 0, sizeof(tv)); 1834 tv.tv_sec = val / hz; 1835 tv.tv_usec = (val % hz) * tick; 1836 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 1837 break; 1838 } 1839 1840 case SO_RTABLE: 1841 if (so->so_proto && so->so_proto->pr_domain && 1842 so->so_proto->pr_domain->dom_protosw && 1843 so->so_proto->pr_ctloutput) { 1844 struct domain *dom = so->so_proto->pr_domain; 1845 1846 level = dom->dom_protosw->pr_protocol; 1847 s = solock(so); 1848 error = (*so->so_proto->pr_ctloutput) 1849 (PRCO_GETOPT, so, level, optname, m); 1850 sounlock(s); 1851 if (error) { 1852 (void)m_free(m); 1853 return (error); 1854 } 1855 break; 1856 } 1857 (void)m_free(m); 1858 return (ENOPROTOOPT); 1859 1860 #ifdef SOCKET_SPLICE 1861 case SO_SPLICE: 1862 { 1863 off_t len; 1864 int s = splsoftnet(); 1865 1866 m->m_len = sizeof(off_t); 1867 len = so->so_sp ? so->so_sp->ssp_len : 0; 1868 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 1869 splx(s); 1870 break; 1871 } 1872 #endif /* SOCKET_SPLICE */ 1873 1874 case SO_PEERCRED: 1875 if (so->so_proto->pr_protocol == AF_UNIX) { 1876 struct unpcb *unp = sotounpcb(so); 1877 1878 if (unp->unp_flags & UNP_FEIDS) { 1879 m->m_len = sizeof(unp->unp_connid); 1880 memcpy(mtod(m, caddr_t), 1881 &(unp->unp_connid), m->m_len); 1882 break; 1883 } 1884 (void)m_free(m); 1885 return (ENOTCONN); 1886 } 1887 (void)m_free(m); 1888 return (EOPNOTSUPP); 1889 1890 default: 1891 (void)m_free(m); 1892 return (ENOPROTOOPT); 1893 } 1894 *mp = m; 1895 return (0); 1896 } 1897 } 1898 1899 void 1900 sohasoutofband(struct socket *so) 1901 { 1902 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1903 selwakeup(&so->so_rcv.sb_sel); 1904 } 1905 1906 int 1907 soo_kqfilter(struct file *fp, struct knote *kn) 1908 { 1909 struct socket *so = kn->kn_fp->f_data; 1910 struct sockbuf *sb; 1911 1912 KERNEL_ASSERT_LOCKED(); 1913 1914 switch (kn->kn_filter) { 1915 case EVFILT_READ: 1916 if (so->so_options & SO_ACCEPTCONN) 1917 kn->kn_fop = &solisten_filtops; 1918 else 1919 kn->kn_fop = &soread_filtops; 1920 sb = &so->so_rcv; 1921 break; 1922 case EVFILT_WRITE: 1923 kn->kn_fop = &sowrite_filtops; 1924 sb = &so->so_snd; 1925 break; 1926 default: 1927 return (EINVAL); 1928 } 1929 1930 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1931 sb->sb_flags |= SB_KNOTE; 1932 1933 return (0); 1934 } 1935 1936 void 1937 filt_sordetach(struct knote *kn) 1938 { 1939 struct socket *so = kn->kn_fp->f_data; 1940 1941 KERNEL_ASSERT_LOCKED(); 1942 1943 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1944 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1945 so->so_rcv.sb_flags &= ~SB_KNOTE; 1946 } 1947 1948 int 1949 filt_soread(struct knote *kn, long hint) 1950 { 1951 struct socket *so = kn->kn_fp->f_data; 1952 1953 kn->kn_data = so->so_rcv.sb_cc; 1954 #ifdef SOCKET_SPLICE 1955 if (isspliced(so)) 1956 return (0); 1957 #endif /* SOCKET_SPLICE */ 1958 if (so->so_state & SS_CANTRCVMORE) { 1959 kn->kn_flags |= EV_EOF; 1960 kn->kn_fflags = so->so_error; 1961 return (1); 1962 } 1963 if (so->so_error) /* temporary udp error */ 1964 return (1); 1965 if (kn->kn_sfflags & NOTE_LOWAT) 1966 return (kn->kn_data >= kn->kn_sdata); 1967 return (kn->kn_data >= so->so_rcv.sb_lowat); 1968 } 1969 1970 void 1971 filt_sowdetach(struct knote *kn) 1972 { 1973 struct socket *so = kn->kn_fp->f_data; 1974 1975 KERNEL_ASSERT_LOCKED(); 1976 1977 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1978 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1979 so->so_snd.sb_flags &= ~SB_KNOTE; 1980 } 1981 1982 int 1983 filt_sowrite(struct knote *kn, long hint) 1984 { 1985 struct socket *so = kn->kn_fp->f_data; 1986 1987 kn->kn_data = sbspace(&so->so_snd); 1988 if (so->so_state & SS_CANTSENDMORE) { 1989 kn->kn_flags |= EV_EOF; 1990 kn->kn_fflags = so->so_error; 1991 return (1); 1992 } 1993 if (so->so_error) /* temporary udp error */ 1994 return (1); 1995 if (((so->so_state & SS_ISCONNECTED) == 0) && 1996 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1997 return (0); 1998 if (kn->kn_sfflags & NOTE_LOWAT) 1999 return (kn->kn_data >= kn->kn_sdata); 2000 return (kn->kn_data >= so->so_snd.sb_lowat); 2001 } 2002 2003 int 2004 filt_solisten(struct knote *kn, long hint) 2005 { 2006 struct socket *so = kn->kn_fp->f_data; 2007 2008 kn->kn_data = so->so_qlen; 2009 return (so->so_qlen != 0); 2010 } 2011 2012 #ifdef DDB 2013 void 2014 sobuf_print(struct sockbuf *, 2015 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2016 2017 void 2018 sobuf_print(struct sockbuf *sb, 2019 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2020 { 2021 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2022 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2023 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2024 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2025 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2026 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2027 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2028 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2029 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2030 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2031 (*pr)("\tsb_sel: ...\n"); 2032 (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); 2033 (*pr)("\tsb_flags: %i\n", sb->sb_flags); 2034 (*pr)("\tsb_timeo: %i\n", sb->sb_timeo); 2035 } 2036 2037 void 2038 so_print(void *v, 2039 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2040 { 2041 struct socket *so = v; 2042 2043 (*pr)("socket %p\n", so); 2044 (*pr)("so_type: %i\n", so->so_type); 2045 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2046 (*pr)("so_linger: %i\n", so->so_linger); 2047 (*pr)("so_state: %i\n", so->so_state); 2048 (*pr)("so_pcb: %p\n", so->so_pcb); 2049 (*pr)("so_proto: %p\n", so->so_proto); 2050 2051 (*pr)("so_head: %p\n", so->so_head); 2052 (*pr)("so_onq: %p\n", so->so_onq); 2053 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2054 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2055 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2056 (*pr)("so_q0len: %i\n", so->so_q0len); 2057 (*pr)("so_qlen: %i\n", so->so_qlen); 2058 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2059 (*pr)("so_timeo: %i\n", so->so_timeo); 2060 (*pr)("so_pgid: %i\n", so->so_pgid); 2061 (*pr)("so_siguid: %i\n", so->so_siguid); 2062 (*pr)("so_sigeuid: %i\n", so->so_sigeuid); 2063 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2064 2065 (*pr)("so_sp: %p\n", so->so_sp); 2066 if (so->so_sp != NULL) { 2067 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2068 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2069 (*pr)("\tssp_len: %lld\n", 2070 (unsigned long long)so->so_sp->ssp_len); 2071 (*pr)("\tssp_max: %lld\n", 2072 (unsigned long long)so->so_sp->ssp_max); 2073 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2074 so->so_sp->ssp_idletv.tv_usec); 2075 (*pr)("\tssp_idleto: %spending (@%i)\n", 2076 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2077 so->so_sp->ssp_idleto.to_time); 2078 } 2079 2080 (*pr)("so_rcv:\n"); 2081 sobuf_print(&so->so_rcv, pr); 2082 (*pr)("so_snd:\n"); 2083 sobuf_print(&so->so_snd, pr); 2084 2085 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2086 so->so_upcall, so->so_upcallarg); 2087 2088 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2089 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2090 (*pr)("so_cpid: %d\n", so->so_cpid); 2091 } 2092 #endif 2093 2094