1 /* $OpenBSD: uipc_socket.c,v 1.202 2017/08/22 09:13:36 mpi Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <net/if.h> 51 #include <sys/pool.h> 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #endif 56 57 void sbsync(struct sockbuf *, struct mbuf *); 58 59 int sosplice(struct socket *, int, off_t, struct timeval *); 60 void sounsplice(struct socket *, struct socket *, int); 61 void soidle(void *); 62 void sotask(void *); 63 int somove(struct socket *, int); 64 65 void filt_sordetach(struct knote *kn); 66 int filt_soread(struct knote *kn, long hint); 67 void filt_sowdetach(struct knote *kn); 68 int filt_sowrite(struct knote *kn, long hint); 69 int filt_solisten(struct knote *kn, long hint); 70 71 struct filterops solisten_filtops = 72 { 1, NULL, filt_sordetach, filt_solisten }; 73 struct filterops soread_filtops = 74 { 1, NULL, filt_sordetach, filt_soread }; 75 struct filterops sowrite_filtops = 76 { 1, NULL, filt_sowdetach, filt_sowrite }; 77 78 79 #ifndef SOMINCONN 80 #define SOMINCONN 80 81 #endif /* SOMINCONN */ 82 83 int somaxconn = SOMAXCONN; 84 int sominconn = SOMINCONN; 85 86 struct pool socket_pool; 87 #ifdef SOCKET_SPLICE 88 struct pool sosplice_pool; 89 struct taskq *sosplice_taskq; 90 #endif 91 92 void 93 soinit(void) 94 { 95 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 96 "sockpl", NULL); 97 #ifdef SOCKET_SPLICE 98 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 99 "sosppl", NULL); 100 #endif 101 } 102 103 /* 104 * Socket operation routines. 105 * These routines are called by the routines in 106 * sys_socket.c or from a system process, and 107 * implement the semantics of socket operations by 108 * switching out to the protocol specific routines. 109 */ 110 int 111 socreate(int dom, struct socket **aso, int type, int proto) 112 { 113 struct proc *p = curproc; /* XXX */ 114 struct protosw *prp; 115 struct socket *so; 116 int error, s; 117 118 if (proto) 119 prp = pffindproto(dom, proto, type); 120 else 121 prp = pffindtype(dom, type); 122 if (prp == NULL || prp->pr_attach == NULL) 123 return (EPROTONOSUPPORT); 124 if (prp->pr_type != type) 125 return (EPROTOTYPE); 126 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 127 TAILQ_INIT(&so->so_q0); 128 TAILQ_INIT(&so->so_q); 129 so->so_type = type; 130 if (suser(p, 0) == 0) 131 so->so_state = SS_PRIV; 132 so->so_ruid = p->p_ucred->cr_ruid; 133 so->so_euid = p->p_ucred->cr_uid; 134 so->so_rgid = p->p_ucred->cr_rgid; 135 so->so_egid = p->p_ucred->cr_gid; 136 so->so_cpid = p->p_p->ps_pid; 137 so->so_proto = prp; 138 139 s = solock(so); 140 error = (*prp->pr_attach)(so, proto); 141 if (error) { 142 so->so_state |= SS_NOFDREF; 143 sofree(so); 144 sounlock(s); 145 return (error); 146 } 147 sounlock(s); 148 *aso = so; 149 return (0); 150 } 151 152 int 153 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 154 { 155 int error; 156 157 soassertlocked(so); 158 159 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 160 return (error); 161 } 162 163 int 164 solisten(struct socket *so, int backlog) 165 { 166 int s, error; 167 168 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 169 return (EOPNOTSUPP); 170 #ifdef SOCKET_SPLICE 171 if (isspliced(so) || issplicedback(so)) 172 return (EOPNOTSUPP); 173 #endif /* SOCKET_SPLICE */ 174 s = solock(so); 175 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 176 curproc); 177 if (error) { 178 sounlock(s); 179 return (error); 180 } 181 if (TAILQ_FIRST(&so->so_q) == NULL) 182 so->so_options |= SO_ACCEPTCONN; 183 if (backlog < 0 || backlog > somaxconn) 184 backlog = somaxconn; 185 if (backlog < sominconn) 186 backlog = sominconn; 187 so->so_qlimit = backlog; 188 sounlock(s); 189 return (0); 190 } 191 192 void 193 sofree(struct socket *so) 194 { 195 soassertlocked(so); 196 197 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 198 return; 199 if (so->so_head) { 200 /* 201 * We must not decommission a socket that's on the accept(2) 202 * queue. If we do, then accept(2) may hang after select(2) 203 * indicated that the listening socket was ready. 204 */ 205 if (!soqremque(so, 0)) 206 return; 207 } 208 #ifdef SOCKET_SPLICE 209 if (so->so_sp) { 210 if (issplicedback(so)) 211 sounsplice(so->so_sp->ssp_soback, so, 212 so->so_sp->ssp_soback != so); 213 if (isspliced(so)) 214 sounsplice(so, so->so_sp->ssp_socket, 0); 215 pool_put(&sosplice_pool, so->so_sp); 216 so->so_sp = NULL; 217 } 218 #endif /* SOCKET_SPLICE */ 219 sbrelease(so, &so->so_snd); 220 sorflush(so); 221 pool_put(&socket_pool, so); 222 } 223 224 /* 225 * Close a socket on last file table reference removal. 226 * Initiate disconnect if connected. 227 * Free socket when disconnect complete. 228 */ 229 int 230 soclose(struct socket *so) 231 { 232 struct socket *so2; 233 int s, error = 0; 234 235 s = solock(so); 236 if (so->so_options & SO_ACCEPTCONN) { 237 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 238 (void) soqremque(so2, 0); 239 (void) soabort(so2); 240 } 241 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 242 (void) soqremque(so2, 1); 243 (void) soabort(so2); 244 } 245 } 246 if (so->so_pcb == 0) 247 goto discard; 248 if (so->so_state & SS_ISCONNECTED) { 249 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 250 error = sodisconnect(so); 251 if (error) 252 goto drop; 253 } 254 if (so->so_options & SO_LINGER) { 255 if ((so->so_state & SS_ISDISCONNECTING) && 256 (so->so_state & SS_NBIO)) 257 goto drop; 258 while (so->so_state & SS_ISCONNECTED) { 259 error = sosleep(so, &so->so_timeo, 260 PSOCK | PCATCH, "netcls", 261 so->so_linger * hz); 262 if (error) 263 break; 264 } 265 } 266 } 267 drop: 268 if (so->so_pcb) { 269 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 270 NULL, NULL, curproc); 271 if (error == 0) 272 error = error2; 273 } 274 discard: 275 if (so->so_state & SS_NOFDREF) 276 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 277 so->so_state |= SS_NOFDREF; 278 sofree(so); 279 sounlock(s); 280 return (error); 281 } 282 283 int 284 soabort(struct socket *so) 285 { 286 soassertlocked(so); 287 288 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 289 curproc); 290 } 291 292 int 293 soaccept(struct socket *so, struct mbuf *nam) 294 { 295 int error = 0; 296 297 soassertlocked(so); 298 299 if ((so->so_state & SS_NOFDREF) == 0) 300 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 301 so->so_state &= ~SS_NOFDREF; 302 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 303 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 304 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 305 nam, NULL, curproc); 306 else 307 error = ECONNABORTED; 308 return (error); 309 } 310 311 int 312 soconnect(struct socket *so, struct mbuf *nam) 313 { 314 int error; 315 316 soassertlocked(so); 317 318 if (so->so_options & SO_ACCEPTCONN) 319 return (EOPNOTSUPP); 320 /* 321 * If protocol is connection-based, can only connect once. 322 * Otherwise, if connected, try to disconnect first. 323 * This allows user to disconnect by connecting to, e.g., 324 * a null address. 325 */ 326 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 327 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 328 (error = sodisconnect(so)))) 329 error = EISCONN; 330 else 331 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 332 NULL, nam, NULL, curproc); 333 return (error); 334 } 335 336 int 337 soconnect2(struct socket *so1, struct socket *so2) 338 { 339 int error; 340 341 soassertlocked(so1); 342 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 343 (struct mbuf *)so2, NULL, curproc); 344 return (error); 345 } 346 347 int 348 sodisconnect(struct socket *so) 349 { 350 int error; 351 352 soassertlocked(so); 353 354 if ((so->so_state & SS_ISCONNECTED) == 0) 355 return (ENOTCONN); 356 if (so->so_state & SS_ISDISCONNECTING) 357 return (EALREADY); 358 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 359 NULL, curproc); 360 return (error); 361 } 362 363 int m_getuio(struct mbuf **, int, long, struct uio *); 364 365 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 366 /* 367 * Send on a socket. 368 * If send must go all at once and message is larger than 369 * send buffering, then hard error. 370 * Lock against other senders. 371 * If must go all at once and not enough room now, then 372 * inform user that this would block and do nothing. 373 * Otherwise, if nonblocking, send as much as possible. 374 * The data to be sent is described by "uio" if nonzero, 375 * otherwise by the mbuf chain "top" (which must be null 376 * if uio is not). Data provided in mbuf chain must be small 377 * enough to send all at once. 378 * 379 * Returns nonzero on error, timeout or signal; callers 380 * must check for short counts if EINTR/ERESTART are returned. 381 * Data and control buffers are freed on return. 382 */ 383 int 384 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 385 struct mbuf *control, int flags) 386 { 387 long space, clen = 0; 388 size_t resid; 389 int error, s; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 397 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 398 m_freem(top); 399 m_freem(control); 400 return (EINVAL); 401 } 402 if (uio && uio->uio_procp) 403 uio->uio_procp->p_ru.ru_msgsnd++; 404 if (control) { 405 /* 406 * In theory clen should be unsigned (since control->m_len is). 407 * However, space must be signed, as it might be less than 0 408 * if we over-committed, and we must use a signed comparison 409 * of space and clen. 410 */ 411 clen = control->m_len; 412 /* reserve extra space for AF_LOCAL's internalize */ 413 if (so->so_proto->pr_domain->dom_family == AF_LOCAL && 414 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 415 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 416 clen = CMSG_SPACE( 417 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 418 (sizeof(struct fdpass) / sizeof(int))); 419 } 420 421 #define snderr(errno) { error = errno; goto release; } 422 423 s = solock(so); 424 restart: 425 if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0) 426 goto out; 427 so->so_state |= SS_ISSENDING; 428 do { 429 if (so->so_state & SS_CANTSENDMORE) 430 snderr(EPIPE); 431 if (so->so_error) { 432 error = so->so_error; 433 so->so_error = 0; 434 snderr(error); 435 } 436 if ((so->so_state & SS_ISCONNECTED) == 0) { 437 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 438 if (!(resid == 0 && clen != 0)) 439 snderr(ENOTCONN); 440 } else if (addr == 0) 441 snderr(EDESTADDRREQ); 442 } 443 space = sbspace(so, &so->so_snd); 444 if (flags & MSG_OOB) 445 space += 1024; 446 if ((atomic && resid > so->so_snd.sb_hiwat) || 447 (so->so_proto->pr_domain->dom_family != AF_LOCAL && 448 clen > so->so_snd.sb_hiwat)) 449 snderr(EMSGSIZE); 450 if (space < clen || 451 (space - clen < resid && 452 (atomic || space < so->so_snd.sb_lowat))) { 453 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) 454 snderr(EWOULDBLOCK); 455 sbunlock(&so->so_snd); 456 error = sbwait(so, &so->so_snd); 457 so->so_state &= ~SS_ISSENDING; 458 if (error) 459 goto out; 460 goto restart; 461 } 462 space -= clen; 463 do { 464 if (uio == NULL) { 465 /* 466 * Data is prepackaged in "top". 467 */ 468 resid = 0; 469 if (flags & MSG_EOR) 470 top->m_flags |= M_EOR; 471 } else { 472 sounlock(s); 473 error = m_getuio(&top, atomic, space, uio); 474 s = solock(so); 475 if (error) 476 goto release; 477 space -= top->m_pkthdr.len; 478 resid = uio->uio_resid; 479 if (flags & MSG_EOR) 480 top->m_flags |= M_EOR; 481 } 482 if (resid == 0) 483 so->so_state &= ~SS_ISSENDING; 484 if (top && so->so_options & SO_ZEROIZE) 485 top->m_flags |= M_ZEROIZE; 486 error = (*so->so_proto->pr_usrreq)(so, 487 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 488 top, addr, control, curproc); 489 clen = 0; 490 control = NULL; 491 top = NULL; 492 if (error) 493 goto release; 494 } while (resid && space > 0); 495 } while (resid); 496 497 release: 498 so->so_state &= ~SS_ISSENDING; 499 sbunlock(&so->so_snd); 500 out: 501 sounlock(s); 502 m_freem(top); 503 m_freem(control); 504 return (error); 505 } 506 507 int 508 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 509 { 510 struct mbuf *m, *top = NULL; 511 struct mbuf **nextp = ⊤ 512 u_long len, mlen; 513 size_t resid = uio->uio_resid; 514 int error; 515 516 do { 517 if (top == NULL) { 518 MGETHDR(m, M_WAIT, MT_DATA); 519 mlen = MHLEN; 520 m->m_pkthdr.len = 0; 521 m->m_pkthdr.ph_ifidx = 0; 522 } else { 523 MGET(m, M_WAIT, MT_DATA); 524 mlen = MLEN; 525 } 526 /* chain mbuf together */ 527 *nextp = m; 528 nextp = &m->m_next; 529 530 resid = ulmin(resid, space); 531 if (resid >= MINCLSIZE) { 532 MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); 533 if ((m->m_flags & M_EXT) == 0) 534 MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); 535 if ((m->m_flags & M_EXT) == 0) 536 goto nopages; 537 mlen = m->m_ext.ext_size; 538 len = ulmin(mlen, resid); 539 /* 540 * For datagram protocols, leave room 541 * for protocol headers in first mbuf. 542 */ 543 if (atomic && top == NULL && len < mlen - max_hdr) 544 m->m_data += max_hdr; 545 } else { 546 nopages: 547 len = ulmin(mlen, resid); 548 /* 549 * For datagram protocols, leave room 550 * for protocol headers in first mbuf. 551 */ 552 if (atomic && top == NULL && len < mlen - max_hdr) 553 MH_ALIGN(m, len); 554 } 555 556 error = uiomove(mtod(m, caddr_t), len, uio); 557 if (error) { 558 m_freem(top); 559 return (error); 560 } 561 562 /* adjust counters */ 563 resid = uio->uio_resid; 564 space -= len; 565 m->m_len = len; 566 top->m_pkthdr.len += len; 567 568 /* Is there more space and more data? */ 569 } while (space > 0 && resid > 0); 570 571 *mp = top; 572 return 0; 573 } 574 575 /* 576 * Following replacement or removal of the first mbuf on the first 577 * mbuf chain of a socket buffer, push necessary state changes back 578 * into the socket buffer so that other consumers see the values 579 * consistently. 'nextrecord' is the callers locally stored value of 580 * the original value of sb->sb_mb->m_nextpkt which must be restored 581 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 582 */ 583 void 584 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 585 { 586 587 /* 588 * First, update for the new value of nextrecord. If necessary, 589 * make it the first record. 590 */ 591 if (sb->sb_mb != NULL) 592 sb->sb_mb->m_nextpkt = nextrecord; 593 else 594 sb->sb_mb = nextrecord; 595 596 /* 597 * Now update any dependent socket buffer fields to reflect 598 * the new state. This is an inline of SB_EMPTY_FIXUP, with 599 * the addition of a second clause that takes care of the 600 * case where sb_mb has been updated, but remains the last 601 * record. 602 */ 603 if (sb->sb_mb == NULL) { 604 sb->sb_mbtail = NULL; 605 sb->sb_lastrecord = NULL; 606 } else if (sb->sb_mb->m_nextpkt == NULL) 607 sb->sb_lastrecord = sb->sb_mb; 608 } 609 610 /* 611 * Implement receive operations on a socket. 612 * We depend on the way that records are added to the sockbuf 613 * by sbappend*. In particular, each record (mbufs linked through m_next) 614 * must begin with an address if the protocol so specifies, 615 * followed by an optional mbuf or mbufs containing ancillary data, 616 * and then zero or more mbufs of data. 617 * In order to avoid blocking network for the entire time here, we release 618 * the solock() while doing the actual copy to user space. 619 * Although the sockbuf is locked, new data may still be appended, 620 * and thus we must maintain consistency of the sockbuf during that time. 621 * 622 * The caller may receive the data as a single mbuf chain by supplying 623 * an mbuf **mp0 for use in returning the chain. The uio is then used 624 * only for the count in uio_resid. 625 */ 626 int 627 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 628 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 629 socklen_t controllen) 630 { 631 struct mbuf *m, **mp; 632 struct mbuf *cm; 633 u_long len, offset, moff; 634 int flags, error, s, type, uio_error = 0; 635 struct protosw *pr = so->so_proto; 636 struct mbuf *nextrecord; 637 size_t resid, orig_resid = uio->uio_resid; 638 639 mp = mp0; 640 if (paddr) 641 *paddr = 0; 642 if (controlp) 643 *controlp = 0; 644 if (flagsp) 645 flags = *flagsp &~ MSG_EOR; 646 else 647 flags = 0; 648 if (so->so_state & SS_NBIO) 649 flags |= MSG_DONTWAIT; 650 if (flags & MSG_OOB) { 651 m = m_get(M_WAIT, MT_DATA); 652 s = solock(so); 653 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 654 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 655 sounlock(s); 656 if (error) 657 goto bad; 658 do { 659 error = uiomove(mtod(m, caddr_t), 660 ulmin(uio->uio_resid, m->m_len), uio); 661 m = m_free(m); 662 } while (uio->uio_resid && error == 0 && m); 663 bad: 664 m_freem(m); 665 return (error); 666 } 667 if (mp) 668 *mp = NULL; 669 670 restart: 671 s = solock(so); 672 if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { 673 sounlock(s); 674 return (error); 675 } 676 677 m = so->so_rcv.sb_mb; 678 #ifdef SOCKET_SPLICE 679 if (isspliced(so)) 680 m = NULL; 681 #endif /* SOCKET_SPLICE */ 682 /* 683 * If we have less data than requested, block awaiting more 684 * (subject to any timeout) if: 685 * 1. the current count is less than the low water mark, 686 * 2. MSG_WAITALL is set, and it is possible to do the entire 687 * receive operation at once if we block (resid <= hiwat), or 688 * 3. MSG_DONTWAIT is not set. 689 * If MSG_WAITALL is set but resid is larger than the receive buffer, 690 * we have to do the receive in sections, and thus risk returning 691 * a short count if a timeout or signal occurs after we start. 692 */ 693 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 694 so->so_rcv.sb_cc < uio->uio_resid) && 695 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 696 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 697 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 698 #ifdef DIAGNOSTIC 699 if (m == NULL && so->so_rcv.sb_cc) 700 #ifdef SOCKET_SPLICE 701 if (!isspliced(so)) 702 #endif /* SOCKET_SPLICE */ 703 panic("receive 1: so %p, so_type %d, sb_cc %lu", 704 so, so->so_type, so->so_rcv.sb_cc); 705 #endif 706 if (so->so_error) { 707 if (m) 708 goto dontblock; 709 error = so->so_error; 710 if ((flags & MSG_PEEK) == 0) 711 so->so_error = 0; 712 goto release; 713 } 714 if (so->so_state & SS_CANTRCVMORE) { 715 if (m) 716 goto dontblock; 717 else if (so->so_rcv.sb_cc == 0) 718 goto release; 719 } 720 for (; m; m = m->m_next) 721 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 722 m = so->so_rcv.sb_mb; 723 goto dontblock; 724 } 725 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 726 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 727 error = ENOTCONN; 728 goto release; 729 } 730 if (uio->uio_resid == 0 && controlp == NULL) 731 goto release; 732 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 733 error = EWOULDBLOCK; 734 goto release; 735 } 736 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 737 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 738 sbunlock(&so->so_rcv); 739 error = sbwait(so, &so->so_rcv); 740 sounlock(s); 741 if (error) 742 return (error); 743 goto restart; 744 } 745 dontblock: 746 /* 747 * On entry here, m points to the first record of the socket buffer. 748 * From this point onward, we maintain 'nextrecord' as a cache of the 749 * pointer to the next record in the socket buffer. We must keep the 750 * various socket buffer pointers and local stack versions of the 751 * pointers in sync, pushing out modifications before operations that 752 * may sleep, and re-reading them afterwards. 753 * 754 * Otherwise, we will race with the network stack appending new data 755 * or records onto the socket buffer by using inconsistent/stale 756 * versions of the field, possibly resulting in socket buffer 757 * corruption. 758 */ 759 if (uio->uio_procp) 760 uio->uio_procp->p_ru.ru_msgrcv++; 761 KASSERT(m == so->so_rcv.sb_mb); 762 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 763 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 764 nextrecord = m->m_nextpkt; 765 if (pr->pr_flags & PR_ADDR) { 766 #ifdef DIAGNOSTIC 767 if (m->m_type != MT_SONAME) 768 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 769 so, so->so_type, m, m->m_type); 770 #endif 771 orig_resid = 0; 772 if (flags & MSG_PEEK) { 773 if (paddr) 774 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 775 m = m->m_next; 776 } else { 777 sbfree(&so->so_rcv, m); 778 if (paddr) { 779 *paddr = m; 780 so->so_rcv.sb_mb = m->m_next; 781 m->m_next = 0; 782 m = so->so_rcv.sb_mb; 783 } else { 784 so->so_rcv.sb_mb = m_free(m); 785 m = so->so_rcv.sb_mb; 786 } 787 sbsync(&so->so_rcv, nextrecord); 788 } 789 } 790 while (m && m->m_type == MT_CONTROL && error == 0) { 791 if (flags & MSG_PEEK) { 792 if (controlp) 793 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 794 m = m->m_next; 795 } else { 796 sbfree(&so->so_rcv, m); 797 so->so_rcv.sb_mb = m->m_next; 798 m->m_nextpkt = m->m_next = NULL; 799 cm = m; 800 m = so->so_rcv.sb_mb; 801 sbsync(&so->so_rcv, nextrecord); 802 if (controlp) { 803 if (pr->pr_domain->dom_externalize && 804 mtod(cm, struct cmsghdr *)->cmsg_type == 805 SCM_RIGHTS) { 806 error = 807 (*pr->pr_domain->dom_externalize) 808 (cm, controllen, flags); 809 } 810 *controlp = cm; 811 } else { 812 /* 813 * Dispose of any SCM_RIGHTS message that went 814 * through the read path rather than recv. 815 */ 816 if (pr->pr_domain->dom_dispose && 817 mtod(cm, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 818 pr->pr_domain->dom_dispose(cm); 819 m_free(cm); 820 } 821 } 822 if (m != NULL) 823 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 824 else 825 nextrecord = so->so_rcv.sb_mb; 826 if (controlp) { 827 orig_resid = 0; 828 controlp = &(*controlp)->m_next; 829 } 830 } 831 832 /* If m is non-NULL, we have some data to read. */ 833 if (m) { 834 type = m->m_type; 835 if (type == MT_OOBDATA) 836 flags |= MSG_OOB; 837 if (m->m_flags & M_BCAST) 838 flags |= MSG_BCAST; 839 if (m->m_flags & M_MCAST) 840 flags |= MSG_MCAST; 841 } 842 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 843 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 844 845 moff = 0; 846 offset = 0; 847 while (m && uio->uio_resid > 0 && error == 0) { 848 if (m->m_type == MT_OOBDATA) { 849 if (type != MT_OOBDATA) 850 break; 851 } else if (type == MT_OOBDATA) 852 break; 853 #ifdef DIAGNOSTIC 854 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 855 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 856 so, so->so_type, m, m->m_type); 857 #endif 858 so->so_state &= ~SS_RCVATMARK; 859 len = uio->uio_resid; 860 if (so->so_oobmark && len > so->so_oobmark - offset) 861 len = so->so_oobmark - offset; 862 if (len > m->m_len - moff) 863 len = m->m_len - moff; 864 /* 865 * If mp is set, just pass back the mbufs. 866 * Otherwise copy them out via the uio, then free. 867 * Sockbuf must be consistent here (points to current mbuf, 868 * it points to next record) when we drop priority; 869 * we must note any additions to the sockbuf when we 870 * block interrupts again. 871 */ 872 if (mp == NULL && uio_error == 0) { 873 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 874 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 875 resid = uio->uio_resid; 876 sounlock(s); 877 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 878 s = solock(so); 879 if (uio_error) 880 uio->uio_resid = resid - len; 881 } else 882 uio->uio_resid -= len; 883 if (len == m->m_len - moff) { 884 if (m->m_flags & M_EOR) 885 flags |= MSG_EOR; 886 if (flags & MSG_PEEK) { 887 m = m->m_next; 888 moff = 0; 889 } else { 890 nextrecord = m->m_nextpkt; 891 sbfree(&so->so_rcv, m); 892 if (mp) { 893 *mp = m; 894 mp = &m->m_next; 895 so->so_rcv.sb_mb = m = m->m_next; 896 *mp = NULL; 897 } else { 898 so->so_rcv.sb_mb = m_free(m); 899 m = so->so_rcv.sb_mb; 900 } 901 /* 902 * If m != NULL, we also know that 903 * so->so_rcv.sb_mb != NULL. 904 */ 905 KASSERT(so->so_rcv.sb_mb == m); 906 if (m) { 907 m->m_nextpkt = nextrecord; 908 if (nextrecord == NULL) 909 so->so_rcv.sb_lastrecord = m; 910 } else { 911 so->so_rcv.sb_mb = nextrecord; 912 SB_EMPTY_FIXUP(&so->so_rcv); 913 } 914 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 915 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 916 } 917 } else { 918 if (flags & MSG_PEEK) 919 moff += len; 920 else { 921 if (mp) 922 *mp = m_copym(m, 0, len, M_WAIT); 923 m->m_data += len; 924 m->m_len -= len; 925 so->so_rcv.sb_cc -= len; 926 so->so_rcv.sb_datacc -= len; 927 } 928 } 929 if (so->so_oobmark) { 930 if ((flags & MSG_PEEK) == 0) { 931 so->so_oobmark -= len; 932 if (so->so_oobmark == 0) { 933 so->so_state |= SS_RCVATMARK; 934 break; 935 } 936 } else { 937 offset += len; 938 if (offset == so->so_oobmark) 939 break; 940 } 941 } 942 if (flags & MSG_EOR) 943 break; 944 /* 945 * If the MSG_WAITALL flag is set (for non-atomic socket), 946 * we must not quit until "uio->uio_resid == 0" or an error 947 * termination. If a signal/timeout occurs, return 948 * with a short count but without error. 949 * Keep sockbuf locked against other readers. 950 */ 951 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 952 !sosendallatonce(so) && !nextrecord) { 953 if (so->so_error || so->so_state & SS_CANTRCVMORE) 954 break; 955 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 956 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 957 error = sbwait(so, &so->so_rcv); 958 if (error) { 959 sbunlock(&so->so_rcv); 960 sounlock(s); 961 return (0); 962 } 963 if ((m = so->so_rcv.sb_mb) != NULL) 964 nextrecord = m->m_nextpkt; 965 } 966 } 967 968 if (m && pr->pr_flags & PR_ATOMIC) { 969 flags |= MSG_TRUNC; 970 if ((flags & MSG_PEEK) == 0) 971 (void) sbdroprecord(&so->so_rcv); 972 } 973 if ((flags & MSG_PEEK) == 0) { 974 if (m == NULL) { 975 /* 976 * First part is an inline SB_EMPTY_FIXUP(). Second 977 * part makes sure sb_lastrecord is up-to-date if 978 * there is still data in the socket buffer. 979 */ 980 so->so_rcv.sb_mb = nextrecord; 981 if (so->so_rcv.sb_mb == NULL) { 982 so->so_rcv.sb_mbtail = NULL; 983 so->so_rcv.sb_lastrecord = NULL; 984 } else if (nextrecord->m_nextpkt == NULL) 985 so->so_rcv.sb_lastrecord = nextrecord; 986 } 987 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 988 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 989 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 990 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 991 (struct mbuf *)(long)flags, NULL, curproc); 992 } 993 if (orig_resid == uio->uio_resid && orig_resid && 994 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 995 sbunlock(&so->so_rcv); 996 sounlock(s); 997 goto restart; 998 } 999 1000 if (uio_error) 1001 error = uio_error; 1002 1003 if (flagsp) 1004 *flagsp |= flags; 1005 release: 1006 sbunlock(&so->so_rcv); 1007 sounlock(s); 1008 return (error); 1009 } 1010 1011 int 1012 soshutdown(struct socket *so, int how) 1013 { 1014 struct protosw *pr = so->so_proto; 1015 int s, error = 0; 1016 1017 s = solock(so); 1018 switch (how) { 1019 case SHUT_RD: 1020 sorflush(so); 1021 break; 1022 case SHUT_RDWR: 1023 sorflush(so); 1024 /* FALLTHROUGH */ 1025 case SHUT_WR: 1026 error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 1027 curproc); 1028 break; 1029 default: 1030 error = EINVAL; 1031 break; 1032 } 1033 sounlock(s); 1034 1035 return (error); 1036 } 1037 1038 void 1039 sorflush(struct socket *so) 1040 { 1041 struct sockbuf *sb = &so->so_rcv; 1042 struct protosw *pr = so->so_proto; 1043 struct socket aso; 1044 1045 sb->sb_flags |= SB_NOINTR; 1046 sblock(so, sb, M_WAITOK); 1047 socantrcvmore(so); 1048 sbunlock(sb); 1049 aso.so_proto = pr; 1050 aso.so_rcv = *sb; 1051 memset(sb, 0, sizeof (*sb)); 1052 /* XXX - the memset stomps all over so_rcv */ 1053 if (aso.so_rcv.sb_flags & SB_KNOTE) { 1054 sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note; 1055 sb->sb_flags = SB_KNOTE; 1056 } 1057 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1058 (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb); 1059 sbrelease(&aso, &aso.so_rcv); 1060 } 1061 1062 #ifdef SOCKET_SPLICE 1063 1064 #define so_splicelen so_sp->ssp_len 1065 #define so_splicemax so_sp->ssp_max 1066 #define so_idletv so_sp->ssp_idletv 1067 #define so_idleto so_sp->ssp_idleto 1068 #define so_splicetask so_sp->ssp_task 1069 1070 int 1071 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1072 { 1073 struct file *fp; 1074 struct socket *sosp; 1075 struct sosplice *sp; 1076 int error = 0; 1077 1078 soassertlocked(so); 1079 1080 if (sosplice_taskq == NULL) 1081 sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 0); 1082 if (sosplice_taskq == NULL) 1083 return (ENOMEM); 1084 1085 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1086 return (EPROTONOSUPPORT); 1087 if (so->so_options & SO_ACCEPTCONN) 1088 return (EOPNOTSUPP); 1089 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1090 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1091 return (ENOTCONN); 1092 if (so->so_sp == NULL) { 1093 sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1094 if (so->so_sp == NULL) 1095 so->so_sp = sp; 1096 else 1097 pool_put(&sosplice_pool, sp); 1098 } 1099 1100 /* If no fd is given, unsplice by removing existing link. */ 1101 if (fd < 0) { 1102 /* Lock receive buffer. */ 1103 if ((error = sblock(so, &so->so_rcv, 1104 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1105 return (error); 1106 } 1107 if (so->so_sp->ssp_socket) 1108 sounsplice(so, so->so_sp->ssp_socket, 1); 1109 sbunlock(&so->so_rcv); 1110 return (0); 1111 } 1112 1113 if (max && max < 0) 1114 return (EINVAL); 1115 1116 if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0)) 1117 return (EINVAL); 1118 1119 /* Find sosp, the drain socket where data will be spliced into. */ 1120 if ((error = getsock(curproc, fd, &fp)) != 0) 1121 return (error); 1122 sosp = fp->f_data; 1123 if (sosp->so_sp == NULL) { 1124 sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1125 if (sosp->so_sp == NULL) 1126 sosp->so_sp = sp; 1127 else 1128 pool_put(&sosplice_pool, sp); 1129 } 1130 1131 /* Lock both receive and send buffer. */ 1132 if ((error = sblock(so, &so->so_rcv, 1133 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1134 FRELE(fp, curproc); 1135 return (error); 1136 } 1137 if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) { 1138 sbunlock(&so->so_rcv); 1139 FRELE(fp, curproc); 1140 return (error); 1141 } 1142 1143 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1144 error = EBUSY; 1145 goto release; 1146 } 1147 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1148 error = EPROTONOSUPPORT; 1149 goto release; 1150 } 1151 if (sosp->so_options & SO_ACCEPTCONN) { 1152 error = EOPNOTSUPP; 1153 goto release; 1154 } 1155 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1156 error = ENOTCONN; 1157 goto release; 1158 } 1159 1160 /* Splice so and sosp together. */ 1161 so->so_sp->ssp_socket = sosp; 1162 sosp->so_sp->ssp_soback = so; 1163 so->so_splicelen = 0; 1164 so->so_splicemax = max; 1165 if (tv) 1166 so->so_idletv = *tv; 1167 else 1168 timerclear(&so->so_idletv); 1169 timeout_set_proc(&so->so_idleto, soidle, so); 1170 task_set(&so->so_splicetask, sotask, so); 1171 1172 /* 1173 * To prevent softnet interrupt from calling somove() while 1174 * we sleep, the socket buffers are not marked as spliced yet. 1175 */ 1176 if (somove(so, M_WAIT)) { 1177 so->so_rcv.sb_flagsintr |= SB_SPLICE; 1178 sosp->so_snd.sb_flagsintr |= SB_SPLICE; 1179 } 1180 1181 release: 1182 sbunlock(&sosp->so_snd); 1183 sbunlock(&so->so_rcv); 1184 FRELE(fp, curproc); 1185 return (error); 1186 } 1187 1188 void 1189 sounsplice(struct socket *so, struct socket *sosp, int wakeup) 1190 { 1191 soassertlocked(so); 1192 1193 task_del(sosplice_taskq, &so->so_splicetask); 1194 timeout_del(&so->so_idleto); 1195 sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; 1196 so->so_rcv.sb_flagsintr &= ~SB_SPLICE; 1197 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1198 if (wakeup && soreadable(so)) 1199 sorwakeup(so); 1200 } 1201 1202 void 1203 soidle(void *arg) 1204 { 1205 struct socket *so = arg; 1206 int s; 1207 1208 s = solock(so); 1209 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1210 so->so_error = ETIMEDOUT; 1211 sounsplice(so, so->so_sp->ssp_socket, 1); 1212 } 1213 sounlock(s); 1214 } 1215 1216 void 1217 sotask(void *arg) 1218 { 1219 struct socket *so = arg; 1220 int s; 1221 1222 s = solock(so); 1223 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1224 /* 1225 * We may not sleep here as sofree() and unsplice() may be 1226 * called from softnet interrupt context. This would remove 1227 * the socket during somove(). 1228 */ 1229 somove(so, M_DONTWAIT); 1230 } 1231 sounlock(s); 1232 1233 /* Avoid user land starvation. */ 1234 yield(); 1235 } 1236 1237 /* 1238 * Move data from receive buffer of spliced source socket to send 1239 * buffer of drain socket. Try to move as much as possible in one 1240 * big chunk. It is a TCP only implementation. 1241 * Return value 0 means splicing has been finished, 1 continue. 1242 */ 1243 int 1244 somove(struct socket *so, int wait) 1245 { 1246 struct socket *sosp = so->so_sp->ssp_socket; 1247 struct mbuf *m, **mp, *nextrecord; 1248 u_long len, off, oobmark; 1249 long space; 1250 int error = 0, maxreached = 0; 1251 short state; 1252 1253 soassertlocked(so); 1254 1255 nextpkt: 1256 if (so->so_error) { 1257 error = so->so_error; 1258 goto release; 1259 } 1260 if (sosp->so_state & SS_CANTSENDMORE) { 1261 error = EPIPE; 1262 goto release; 1263 } 1264 if (sosp->so_error && sosp->so_error != ETIMEDOUT && 1265 sosp->so_error != EFBIG && sosp->so_error != ELOOP) { 1266 error = sosp->so_error; 1267 goto release; 1268 } 1269 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1270 goto release; 1271 1272 /* Calculate how many bytes can be copied now. */ 1273 len = so->so_rcv.sb_datacc; 1274 if (so->so_splicemax) { 1275 KASSERT(so->so_splicelen < so->so_splicemax); 1276 if (so->so_splicemax <= so->so_splicelen + len) { 1277 len = so->so_splicemax - so->so_splicelen; 1278 maxreached = 1; 1279 } 1280 } 1281 space = sbspace(sosp, &sosp->so_snd); 1282 if (so->so_oobmark && so->so_oobmark < len && 1283 so->so_oobmark < space + 1024) 1284 space += 1024; 1285 if (space <= 0) { 1286 maxreached = 0; 1287 goto release; 1288 } 1289 if (space < len) { 1290 maxreached = 0; 1291 if (space < sosp->so_snd.sb_lowat) 1292 goto release; 1293 len = space; 1294 } 1295 sosp->so_state |= SS_ISSENDING; 1296 1297 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1298 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1299 m = so->so_rcv.sb_mb; 1300 if (m == NULL) 1301 goto release; 1302 nextrecord = m->m_nextpkt; 1303 1304 /* Drop address and control information not used with splicing. */ 1305 if (so->so_proto->pr_flags & PR_ADDR) { 1306 #ifdef DIAGNOSTIC 1307 if (m->m_type != MT_SONAME) 1308 panic("somove soname: so %p, so_type %d, m %p, " 1309 "m_type %d", so, so->so_type, m, m->m_type); 1310 #endif 1311 m = m->m_next; 1312 } 1313 while (m && m->m_type == MT_CONTROL) 1314 m = m->m_next; 1315 if (m == NULL) { 1316 sbdroprecord(&so->so_rcv); 1317 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1318 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1319 NULL, NULL, NULL); 1320 goto nextpkt; 1321 } 1322 1323 /* 1324 * By splicing sockets connected to localhost, userland might create a 1325 * loop. Dissolve splicing with error if loop is detected by counter. 1326 */ 1327 if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 1328 error = ELOOP; 1329 goto release; 1330 } 1331 1332 if (so->so_proto->pr_flags & PR_ATOMIC) { 1333 if ((m->m_flags & M_PKTHDR) == 0) 1334 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1335 "m_type %d", so, so->so_type, m, m->m_type); 1336 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1337 error = EMSGSIZE; 1338 goto release; 1339 } 1340 if (len < m->m_pkthdr.len) 1341 goto release; 1342 if (m->m_pkthdr.len < len) { 1343 maxreached = 0; 1344 len = m->m_pkthdr.len; 1345 } 1346 /* 1347 * Throw away the name mbuf after it has been assured 1348 * that the whole first record can be processed. 1349 */ 1350 m = so->so_rcv.sb_mb; 1351 sbfree(&so->so_rcv, m); 1352 so->so_rcv.sb_mb = m_free(m); 1353 sbsync(&so->so_rcv, nextrecord); 1354 } 1355 /* 1356 * Throw away the control mbufs after it has been assured 1357 * that the whole first record can be processed. 1358 */ 1359 m = so->so_rcv.sb_mb; 1360 while (m && m->m_type == MT_CONTROL) { 1361 sbfree(&so->so_rcv, m); 1362 so->so_rcv.sb_mb = m_free(m); 1363 m = so->so_rcv.sb_mb; 1364 sbsync(&so->so_rcv, nextrecord); 1365 } 1366 1367 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1368 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1369 1370 /* Take at most len mbufs out of receive buffer. */ 1371 for (off = 0, mp = &m; off <= len && *mp; 1372 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1373 u_long size = len - off; 1374 1375 #ifdef DIAGNOSTIC 1376 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1377 panic("somove type: so %p, so_type %d, m %p, " 1378 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1379 #endif 1380 if ((*mp)->m_len > size) { 1381 /* 1382 * Move only a partial mbuf at maximum splice length or 1383 * if the drain buffer is too small for this large mbuf. 1384 */ 1385 if (!maxreached && so->so_snd.sb_datacc > 0) { 1386 len -= size; 1387 break; 1388 } 1389 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1390 if (*mp == NULL) { 1391 len -= size; 1392 break; 1393 } 1394 so->so_rcv.sb_mb->m_data += size; 1395 so->so_rcv.sb_mb->m_len -= size; 1396 so->so_rcv.sb_cc -= size; 1397 so->so_rcv.sb_datacc -= size; 1398 } else { 1399 *mp = so->so_rcv.sb_mb; 1400 sbfree(&so->so_rcv, *mp); 1401 so->so_rcv.sb_mb = (*mp)->m_next; 1402 sbsync(&so->so_rcv, nextrecord); 1403 } 1404 } 1405 *mp = NULL; 1406 1407 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1408 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1409 SBCHECK(&so->so_rcv); 1410 if (m == NULL) 1411 goto release; 1412 m->m_nextpkt = NULL; 1413 if (m->m_flags & M_PKTHDR) { 1414 m_resethdr(m); 1415 m->m_pkthdr.len = len; 1416 } 1417 1418 /* Send window update to source peer as receive buffer has changed. */ 1419 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1420 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1421 NULL, NULL, NULL); 1422 1423 /* Receive buffer did shrink by len bytes, adjust oob. */ 1424 state = so->so_state; 1425 so->so_state &= ~SS_RCVATMARK; 1426 oobmark = so->so_oobmark; 1427 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1428 if (oobmark) { 1429 if (oobmark == len) 1430 so->so_state |= SS_RCVATMARK; 1431 if (oobmark >= len) 1432 oobmark = 0; 1433 } 1434 1435 /* 1436 * Handle oob data. If any malloc fails, ignore error. 1437 * TCP urgent data is not very reliable anyway. 1438 */ 1439 while (((state & SS_RCVATMARK) || oobmark) && 1440 (so->so_options & SO_OOBINLINE)) { 1441 struct mbuf *o = NULL; 1442 1443 if (state & SS_RCVATMARK) { 1444 o = m_get(wait, MT_DATA); 1445 state &= ~SS_RCVATMARK; 1446 } else if (oobmark) { 1447 o = m_split(m, oobmark, wait); 1448 if (o) { 1449 error = (*sosp->so_proto->pr_usrreq)(sosp, 1450 PRU_SEND, m, NULL, NULL, NULL); 1451 if (error) { 1452 if (sosp->so_state & SS_CANTSENDMORE) 1453 error = EPIPE; 1454 m_freem(o); 1455 goto release; 1456 } 1457 len -= oobmark; 1458 so->so_splicelen += oobmark; 1459 m = o; 1460 o = m_get(wait, MT_DATA); 1461 } 1462 oobmark = 0; 1463 } 1464 if (o) { 1465 o->m_len = 1; 1466 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1467 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1468 o, NULL, NULL, NULL); 1469 if (error) { 1470 if (sosp->so_state & SS_CANTSENDMORE) 1471 error = EPIPE; 1472 m_freem(m); 1473 goto release; 1474 } 1475 len -= 1; 1476 so->so_splicelen += 1; 1477 if (oobmark) { 1478 oobmark -= 1; 1479 if (oobmark == 0) 1480 state |= SS_RCVATMARK; 1481 } 1482 m_adj(m, 1); 1483 } 1484 } 1485 1486 /* Append all remaining data to drain socket. */ 1487 if (so->so_rcv.sb_cc == 0 || maxreached) 1488 sosp->so_state &= ~SS_ISSENDING; 1489 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, 1490 NULL); 1491 if (error) { 1492 if (sosp->so_state & SS_CANTSENDMORE) 1493 error = EPIPE; 1494 goto release; 1495 } 1496 so->so_splicelen += len; 1497 1498 /* Move several packets if possible. */ 1499 if (!maxreached && nextrecord) 1500 goto nextpkt; 1501 1502 release: 1503 sosp->so_state &= ~SS_ISSENDING; 1504 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1505 error = EFBIG; 1506 if (error) 1507 so->so_error = error; 1508 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1509 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1510 sounsplice(so, sosp, 1); 1511 return (0); 1512 } 1513 if (timerisset(&so->so_idletv)) 1514 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1515 return (1); 1516 } 1517 1518 #endif /* SOCKET_SPLICE */ 1519 1520 void 1521 sorwakeup(struct socket *so) 1522 { 1523 soassertlocked(so); 1524 1525 #ifdef SOCKET_SPLICE 1526 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1527 /* 1528 * TCP has a sendbuffer that can handle multiple packets 1529 * at once. So queue the stream a bit to accumulate data. 1530 * The sosplice thread will call somove() later and send 1531 * the packets calling tcp_output() only once. 1532 * In the UDP case, send out the packets immediately. 1533 * Using a thread would make things slower. 1534 */ 1535 if (so->so_proto->pr_flags & PR_WANTRCVD) 1536 task_add(sosplice_taskq, &so->so_splicetask); 1537 else 1538 somove(so, M_DONTWAIT); 1539 } 1540 if (isspliced(so)) 1541 return; 1542 #endif 1543 sowakeup(so, &so->so_rcv); 1544 if (so->so_upcall) 1545 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1546 } 1547 1548 void 1549 sowwakeup(struct socket *so) 1550 { 1551 soassertlocked(so); 1552 1553 #ifdef SOCKET_SPLICE 1554 if (so->so_snd.sb_flagsintr & SB_SPLICE) 1555 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); 1556 #endif 1557 sowakeup(so, &so->so_snd); 1558 } 1559 1560 int 1561 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1562 { 1563 int error = 0; 1564 struct mbuf *m = m0; 1565 1566 soassertlocked(so); 1567 1568 if (level != SOL_SOCKET) { 1569 if (so->so_proto->pr_ctloutput) { 1570 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1571 level, optname, m0); 1572 return (error); 1573 } 1574 error = ENOPROTOOPT; 1575 } else { 1576 switch (optname) { 1577 case SO_BINDANY: 1578 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1579 goto bad; 1580 break; 1581 } 1582 1583 switch (optname) { 1584 1585 case SO_LINGER: 1586 if (m == NULL || m->m_len != sizeof (struct linger) || 1587 mtod(m, struct linger *)->l_linger < 0 || 1588 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1589 error = EINVAL; 1590 goto bad; 1591 } 1592 so->so_linger = mtod(m, struct linger *)->l_linger; 1593 /* FALLTHROUGH */ 1594 1595 case SO_BINDANY: 1596 case SO_DEBUG: 1597 case SO_KEEPALIVE: 1598 case SO_USELOOPBACK: 1599 case SO_BROADCAST: 1600 case SO_REUSEADDR: 1601 case SO_REUSEPORT: 1602 case SO_OOBINLINE: 1603 case SO_TIMESTAMP: 1604 case SO_ZEROIZE: 1605 if (m == NULL || m->m_len < sizeof (int)) { 1606 error = EINVAL; 1607 goto bad; 1608 } 1609 if (*mtod(m, int *)) 1610 so->so_options |= optname; 1611 else 1612 so->so_options &= ~optname; 1613 break; 1614 1615 case SO_DONTROUTE: 1616 if (m == NULL || m->m_len < sizeof (int)) { 1617 error = EINVAL; 1618 goto bad; 1619 } 1620 if (*mtod(m, int *)) 1621 error = EOPNOTSUPP; 1622 break; 1623 1624 case SO_SNDBUF: 1625 case SO_RCVBUF: 1626 case SO_SNDLOWAT: 1627 case SO_RCVLOWAT: 1628 { 1629 u_long cnt; 1630 1631 if (m == NULL || m->m_len < sizeof (int)) { 1632 error = EINVAL; 1633 goto bad; 1634 } 1635 cnt = *mtod(m, int *); 1636 if ((long)cnt <= 0) 1637 cnt = 1; 1638 switch (optname) { 1639 1640 case SO_SNDBUF: 1641 if (so->so_state & SS_CANTSENDMORE) { 1642 error = EINVAL; 1643 goto bad; 1644 } 1645 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1646 sbreserve(so, &so->so_snd, cnt)) { 1647 error = ENOBUFS; 1648 goto bad; 1649 } 1650 so->so_snd.sb_wat = cnt; 1651 break; 1652 1653 case SO_RCVBUF: 1654 if (so->so_state & SS_CANTRCVMORE) { 1655 error = EINVAL; 1656 goto bad; 1657 } 1658 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1659 sbreserve(so, &so->so_rcv, cnt)) { 1660 error = ENOBUFS; 1661 goto bad; 1662 } 1663 so->so_rcv.sb_wat = cnt; 1664 break; 1665 1666 case SO_SNDLOWAT: 1667 so->so_snd.sb_lowat = 1668 (cnt > so->so_snd.sb_hiwat) ? 1669 so->so_snd.sb_hiwat : cnt; 1670 break; 1671 case SO_RCVLOWAT: 1672 so->so_rcv.sb_lowat = 1673 (cnt > so->so_rcv.sb_hiwat) ? 1674 so->so_rcv.sb_hiwat : cnt; 1675 break; 1676 } 1677 break; 1678 } 1679 1680 case SO_SNDTIMEO: 1681 case SO_RCVTIMEO: 1682 { 1683 struct timeval tv; 1684 int val; 1685 1686 if (m == NULL || m->m_len < sizeof (tv)) { 1687 error = EINVAL; 1688 goto bad; 1689 } 1690 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 1691 val = tvtohz(&tv); 1692 if (val > USHRT_MAX) { 1693 error = EDOM; 1694 goto bad; 1695 } 1696 1697 switch (optname) { 1698 1699 case SO_SNDTIMEO: 1700 so->so_snd.sb_timeo = val; 1701 break; 1702 case SO_RCVTIMEO: 1703 so->so_rcv.sb_timeo = val; 1704 break; 1705 } 1706 break; 1707 } 1708 1709 case SO_RTABLE: 1710 if (so->so_proto->pr_domain && 1711 so->so_proto->pr_domain->dom_protosw && 1712 so->so_proto->pr_ctloutput) { 1713 struct domain *dom = so->so_proto->pr_domain; 1714 1715 level = dom->dom_protosw->pr_protocol; 1716 error = (*so->so_proto->pr_ctloutput) 1717 (PRCO_SETOPT, so, level, optname, m0); 1718 return (error); 1719 } 1720 error = ENOPROTOOPT; 1721 break; 1722 1723 #ifdef SOCKET_SPLICE 1724 case SO_SPLICE: 1725 if (m == NULL) { 1726 error = sosplice(so, -1, 0, NULL); 1727 } else if (m->m_len < sizeof(int)) { 1728 error = EINVAL; 1729 goto bad; 1730 } else if (m->m_len < sizeof(struct splice)) { 1731 error = sosplice(so, *mtod(m, int *), 0, NULL); 1732 } else { 1733 error = sosplice(so, 1734 mtod(m, struct splice *)->sp_fd, 1735 mtod(m, struct splice *)->sp_max, 1736 &mtod(m, struct splice *)->sp_idle); 1737 } 1738 break; 1739 #endif /* SOCKET_SPLICE */ 1740 1741 default: 1742 error = ENOPROTOOPT; 1743 break; 1744 } 1745 if (error == 0 && so->so_proto->pr_ctloutput) { 1746 (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1747 level, optname, m0); 1748 m = NULL; /* freed by protocol */ 1749 } 1750 } 1751 bad: 1752 if (m) 1753 (void) m_free(m); 1754 return (error); 1755 } 1756 1757 int 1758 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 1759 { 1760 int error = 0; 1761 1762 soassertlocked(so); 1763 1764 if (level != SOL_SOCKET) { 1765 if (so->so_proto->pr_ctloutput) { 1766 m->m_len = 0; 1767 1768 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 1769 level, optname, m); 1770 if (error) 1771 return (error); 1772 return (0); 1773 } else 1774 return (ENOPROTOOPT); 1775 } else { 1776 m->m_len = sizeof (int); 1777 1778 switch (optname) { 1779 1780 case SO_LINGER: 1781 m->m_len = sizeof (struct linger); 1782 mtod(m, struct linger *)->l_onoff = 1783 so->so_options & SO_LINGER; 1784 mtod(m, struct linger *)->l_linger = so->so_linger; 1785 break; 1786 1787 case SO_BINDANY: 1788 case SO_USELOOPBACK: 1789 case SO_DEBUG: 1790 case SO_KEEPALIVE: 1791 case SO_REUSEADDR: 1792 case SO_REUSEPORT: 1793 case SO_BROADCAST: 1794 case SO_OOBINLINE: 1795 case SO_TIMESTAMP: 1796 case SO_ZEROIZE: 1797 *mtod(m, int *) = so->so_options & optname; 1798 break; 1799 1800 case SO_DONTROUTE: 1801 *mtod(m, int *) = 0; 1802 break; 1803 1804 case SO_TYPE: 1805 *mtod(m, int *) = so->so_type; 1806 break; 1807 1808 case SO_ERROR: 1809 *mtod(m, int *) = so->so_error; 1810 so->so_error = 0; 1811 break; 1812 1813 case SO_SNDBUF: 1814 *mtod(m, int *) = so->so_snd.sb_hiwat; 1815 break; 1816 1817 case SO_RCVBUF: 1818 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1819 break; 1820 1821 case SO_SNDLOWAT: 1822 *mtod(m, int *) = so->so_snd.sb_lowat; 1823 break; 1824 1825 case SO_RCVLOWAT: 1826 *mtod(m, int *) = so->so_rcv.sb_lowat; 1827 break; 1828 1829 case SO_SNDTIMEO: 1830 case SO_RCVTIMEO: 1831 { 1832 struct timeval tv; 1833 int val = (optname == SO_SNDTIMEO ? 1834 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1835 1836 m->m_len = sizeof(struct timeval); 1837 memset(&tv, 0, sizeof(tv)); 1838 tv.tv_sec = val / hz; 1839 tv.tv_usec = (val % hz) * tick; 1840 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 1841 break; 1842 } 1843 1844 case SO_RTABLE: 1845 if (so->so_proto->pr_domain && 1846 so->so_proto->pr_domain->dom_protosw && 1847 so->so_proto->pr_ctloutput) { 1848 struct domain *dom = so->so_proto->pr_domain; 1849 1850 level = dom->dom_protosw->pr_protocol; 1851 error = (*so->so_proto->pr_ctloutput) 1852 (PRCO_GETOPT, so, level, optname, m); 1853 if (error) 1854 return (error); 1855 break; 1856 } 1857 return (ENOPROTOOPT); 1858 1859 #ifdef SOCKET_SPLICE 1860 case SO_SPLICE: 1861 { 1862 off_t len; 1863 1864 m->m_len = sizeof(off_t); 1865 len = so->so_sp ? so->so_sp->ssp_len : 0; 1866 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 1867 break; 1868 } 1869 #endif /* SOCKET_SPLICE */ 1870 1871 case SO_PEERCRED: 1872 if (so->so_proto->pr_protocol == AF_UNIX) { 1873 struct unpcb *unp = sotounpcb(so); 1874 1875 if (unp->unp_flags & UNP_FEIDS) { 1876 m->m_len = sizeof(unp->unp_connid); 1877 memcpy(mtod(m, caddr_t), 1878 &(unp->unp_connid), m->m_len); 1879 break; 1880 } 1881 return (ENOTCONN); 1882 } 1883 return (EOPNOTSUPP); 1884 1885 default: 1886 return (ENOPROTOOPT); 1887 } 1888 return (0); 1889 } 1890 } 1891 1892 void 1893 sohasoutofband(struct socket *so) 1894 { 1895 KERNEL_ASSERT_LOCKED(); 1896 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1897 selwakeup(&so->so_rcv.sb_sel); 1898 } 1899 1900 int 1901 soo_kqfilter(struct file *fp, struct knote *kn) 1902 { 1903 struct socket *so = kn->kn_fp->f_data; 1904 struct sockbuf *sb; 1905 1906 KERNEL_ASSERT_LOCKED(); 1907 1908 switch (kn->kn_filter) { 1909 case EVFILT_READ: 1910 if (so->so_options & SO_ACCEPTCONN) 1911 kn->kn_fop = &solisten_filtops; 1912 else 1913 kn->kn_fop = &soread_filtops; 1914 sb = &so->so_rcv; 1915 break; 1916 case EVFILT_WRITE: 1917 kn->kn_fop = &sowrite_filtops; 1918 sb = &so->so_snd; 1919 break; 1920 default: 1921 return (EINVAL); 1922 } 1923 1924 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1925 sb->sb_flags |= SB_KNOTE; 1926 1927 return (0); 1928 } 1929 1930 void 1931 filt_sordetach(struct knote *kn) 1932 { 1933 struct socket *so = kn->kn_fp->f_data; 1934 1935 KERNEL_ASSERT_LOCKED(); 1936 1937 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1938 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1939 so->so_rcv.sb_flags &= ~SB_KNOTE; 1940 } 1941 1942 int 1943 filt_soread(struct knote *kn, long hint) 1944 { 1945 struct socket *so = kn->kn_fp->f_data; 1946 int rv; 1947 1948 kn->kn_data = so->so_rcv.sb_cc; 1949 #ifdef SOCKET_SPLICE 1950 if (isspliced(so)) { 1951 rv = 0; 1952 } else 1953 #endif /* SOCKET_SPLICE */ 1954 if (so->so_state & SS_CANTRCVMORE) { 1955 kn->kn_flags |= EV_EOF; 1956 kn->kn_fflags = so->so_error; 1957 rv = 1; 1958 } else if (so->so_error) { /* temporary udp error */ 1959 rv = 1; 1960 } else if (kn->kn_sfflags & NOTE_LOWAT) { 1961 rv = (kn->kn_data >= kn->kn_sdata); 1962 } else { 1963 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 1964 } 1965 1966 return rv; 1967 } 1968 1969 void 1970 filt_sowdetach(struct knote *kn) 1971 { 1972 struct socket *so = kn->kn_fp->f_data; 1973 1974 KERNEL_ASSERT_LOCKED(); 1975 1976 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1977 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1978 so->so_snd.sb_flags &= ~SB_KNOTE; 1979 } 1980 1981 int 1982 filt_sowrite(struct knote *kn, long hint) 1983 { 1984 struct socket *so = kn->kn_fp->f_data; 1985 int rv; 1986 1987 kn->kn_data = sbspace(so, &so->so_snd); 1988 if (so->so_state & SS_CANTSENDMORE) { 1989 kn->kn_flags |= EV_EOF; 1990 kn->kn_fflags = so->so_error; 1991 rv = 1; 1992 } else if (so->so_error) { /* temporary udp error */ 1993 rv = 1; 1994 } else if (((so->so_state & SS_ISCONNECTED) == 0) && 1995 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1996 rv = 0; 1997 } else if (kn->kn_sfflags & NOTE_LOWAT) { 1998 rv = (kn->kn_data >= kn->kn_sdata); 1999 } else { 2000 rv = (kn->kn_data >= so->so_snd.sb_lowat); 2001 } 2002 2003 return (rv); 2004 } 2005 2006 int 2007 filt_solisten(struct knote *kn, long hint) 2008 { 2009 struct socket *so = kn->kn_fp->f_data; 2010 2011 kn->kn_data = so->so_qlen; 2012 2013 return (kn->kn_data != 0); 2014 } 2015 2016 #ifdef DDB 2017 void 2018 sobuf_print(struct sockbuf *, 2019 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2020 2021 void 2022 sobuf_print(struct sockbuf *sb, 2023 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2024 { 2025 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2026 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2027 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2028 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2029 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2030 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2031 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2032 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2033 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2034 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2035 (*pr)("\tsb_sel: ...\n"); 2036 (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); 2037 (*pr)("\tsb_flags: %i\n", sb->sb_flags); 2038 (*pr)("\tsb_timeo: %i\n", sb->sb_timeo); 2039 } 2040 2041 void 2042 so_print(void *v, 2043 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2044 { 2045 struct socket *so = v; 2046 2047 (*pr)("socket %p\n", so); 2048 (*pr)("so_type: %i\n", so->so_type); 2049 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2050 (*pr)("so_linger: %i\n", so->so_linger); 2051 (*pr)("so_state: 0x%04x\n", so->so_state); 2052 (*pr)("so_pcb: %p\n", so->so_pcb); 2053 (*pr)("so_proto: %p\n", so->so_proto); 2054 2055 (*pr)("so_head: %p\n", so->so_head); 2056 (*pr)("so_onq: %p\n", so->so_onq); 2057 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2058 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2059 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2060 (*pr)("so_q0len: %i\n", so->so_q0len); 2061 (*pr)("so_qlen: %i\n", so->so_qlen); 2062 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2063 (*pr)("so_timeo: %i\n", so->so_timeo); 2064 (*pr)("so_pgid: %i\n", so->so_pgid); 2065 (*pr)("so_siguid: %i\n", so->so_siguid); 2066 (*pr)("so_sigeuid: %i\n", so->so_sigeuid); 2067 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2068 2069 (*pr)("so_sp: %p\n", so->so_sp); 2070 if (so->so_sp != NULL) { 2071 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2072 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2073 (*pr)("\tssp_len: %lld\n", 2074 (unsigned long long)so->so_sp->ssp_len); 2075 (*pr)("\tssp_max: %lld\n", 2076 (unsigned long long)so->so_sp->ssp_max); 2077 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2078 so->so_sp->ssp_idletv.tv_usec); 2079 (*pr)("\tssp_idleto: %spending (@%i)\n", 2080 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2081 so->so_sp->ssp_idleto.to_time); 2082 } 2083 2084 (*pr)("so_rcv:\n"); 2085 sobuf_print(&so->so_rcv, pr); 2086 (*pr)("so_snd:\n"); 2087 sobuf_print(&so->so_snd, pr); 2088 2089 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2090 so->so_upcall, so->so_upcallarg); 2091 2092 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2093 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2094 (*pr)("so_cpid: %d\n", so->so_cpid); 2095 } 2096 #endif 2097 2098