1 /* $OpenBSD: uipc_socket.c,v 1.161 2016/09/20 14:27:43 bluhm Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <net/if.h> 51 #include <sys/pool.h> 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #endif 56 57 void sbsync(struct sockbuf *, struct mbuf *); 58 59 int sosplice(struct socket *, int, off_t, struct timeval *); 60 void sounsplice(struct socket *, struct socket *, int); 61 void soidle(void *); 62 void sotask(void *); 63 int somove(struct socket *, int); 64 65 void filt_sordetach(struct knote *kn); 66 int filt_soread(struct knote *kn, long hint); 67 void filt_sowdetach(struct knote *kn); 68 int filt_sowrite(struct knote *kn, long hint); 69 int filt_solisten(struct knote *kn, long hint); 70 71 struct filterops solisten_filtops = 72 { 1, NULL, filt_sordetach, filt_solisten }; 73 struct filterops soread_filtops = 74 { 1, NULL, filt_sordetach, filt_soread }; 75 struct filterops sowrite_filtops = 76 { 1, NULL, filt_sowdetach, filt_sowrite }; 77 78 79 #ifndef SOMINCONN 80 #define SOMINCONN 80 81 #endif /* SOMINCONN */ 82 83 int somaxconn = SOMAXCONN; 84 int sominconn = SOMINCONN; 85 86 struct pool socket_pool; 87 #ifdef SOCKET_SPLICE 88 struct pool sosplice_pool; 89 struct taskq *sosplice_taskq; 90 #endif 91 92 void 93 soinit(void) 94 { 95 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 96 "sockpl", NULL); 97 #ifdef SOCKET_SPLICE 98 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 99 "sosppl", NULL); 100 #endif 101 } 102 103 /* 104 * Socket operation routines. 105 * These routines are called by the routines in 106 * sys_socket.c or from a system process, and 107 * implement the semantics of socket operations by 108 * switching out to the protocol specific routines. 109 */ 110 int 111 socreate(int dom, struct socket **aso, int type, int proto) 112 { 113 struct proc *p = curproc; /* XXX */ 114 struct protosw *prp; 115 struct socket *so; 116 int error, s; 117 118 if (proto) 119 prp = pffindproto(dom, proto, type); 120 else 121 prp = pffindtype(dom, type); 122 if (prp == NULL || prp->pr_usrreq == 0) 123 return (EPROTONOSUPPORT); 124 if (prp->pr_type != type) 125 return (EPROTOTYPE); 126 s = splsoftnet(); 127 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 128 TAILQ_INIT(&so->so_q0); 129 TAILQ_INIT(&so->so_q); 130 so->so_type = type; 131 if (suser(p, 0) == 0) 132 so->so_state = SS_PRIV; 133 so->so_ruid = p->p_ucred->cr_ruid; 134 so->so_euid = p->p_ucred->cr_uid; 135 so->so_rgid = p->p_ucred->cr_rgid; 136 so->so_egid = p->p_ucred->cr_gid; 137 so->so_cpid = p->p_p->ps_pid; 138 so->so_proto = prp; 139 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 140 (struct mbuf *)(long)proto, NULL, p); 141 if (error) { 142 so->so_state |= SS_NOFDREF; 143 sofree(so); 144 splx(s); 145 return (error); 146 } 147 splx(s); 148 *aso = so; 149 return (0); 150 } 151 152 int 153 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 154 { 155 int s = splsoftnet(); 156 int error; 157 158 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 159 splx(s); 160 return (error); 161 } 162 163 int 164 solisten(struct socket *so, int backlog) 165 { 166 int s, error; 167 168 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 169 return (EOPNOTSUPP); 170 #ifdef SOCKET_SPLICE 171 if (isspliced(so) || issplicedback(so)) 172 return (EOPNOTSUPP); 173 #endif /* SOCKET_SPLICE */ 174 s = splsoftnet(); 175 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 176 curproc); 177 if (error) { 178 splx(s); 179 return (error); 180 } 181 if (TAILQ_FIRST(&so->so_q) == NULL) 182 so->so_options |= SO_ACCEPTCONN; 183 if (backlog < 0 || backlog > somaxconn) 184 backlog = somaxconn; 185 if (backlog < sominconn) 186 backlog = sominconn; 187 so->so_qlimit = backlog; 188 splx(s); 189 return (0); 190 } 191 192 /* 193 * Must be called at splsoftnet() 194 */ 195 196 void 197 sofree(struct socket *so) 198 { 199 splsoftassert(IPL_SOFTNET); 200 201 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 202 return; 203 if (so->so_head) { 204 /* 205 * We must not decommission a socket that's on the accept(2) 206 * queue. If we do, then accept(2) may hang after select(2) 207 * indicated that the listening socket was ready. 208 */ 209 if (!soqremque(so, 0)) 210 return; 211 } 212 #ifdef SOCKET_SPLICE 213 if (so->so_sp) { 214 if (issplicedback(so)) 215 sounsplice(so->so_sp->ssp_soback, so, 216 so->so_sp->ssp_soback != so); 217 if (isspliced(so)) 218 sounsplice(so, so->so_sp->ssp_socket, 0); 219 pool_put(&sosplice_pool, so->so_sp); 220 so->so_sp = NULL; 221 } 222 #endif /* SOCKET_SPLICE */ 223 sbrelease(&so->so_snd); 224 sorflush(so); 225 pool_put(&socket_pool, so); 226 } 227 228 /* 229 * Close a socket on last file table reference removal. 230 * Initiate disconnect if connected. 231 * Free socket when disconnect complete. 232 */ 233 int 234 soclose(struct socket *so) 235 { 236 struct socket *so2; 237 int s = splsoftnet(); /* conservative */ 238 int error = 0; 239 240 if (so->so_options & SO_ACCEPTCONN) { 241 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 242 (void) soqremque(so2, 0); 243 (void) soabort(so2); 244 } 245 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 246 (void) soqremque(so2, 1); 247 (void) soabort(so2); 248 } 249 } 250 if (so->so_pcb == 0) 251 goto discard; 252 if (so->so_state & SS_ISCONNECTED) { 253 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 254 error = sodisconnect(so); 255 if (error) 256 goto drop; 257 } 258 if (so->so_options & SO_LINGER) { 259 if ((so->so_state & SS_ISDISCONNECTING) && 260 (so->so_state & SS_NBIO)) 261 goto drop; 262 while (so->so_state & SS_ISCONNECTED) { 263 error = tsleep(&so->so_timeo, 264 PSOCK | PCATCH, "netcls", 265 so->so_linger * hz); 266 if (error) 267 break; 268 } 269 } 270 } 271 drop: 272 if (so->so_pcb) { 273 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 274 NULL, NULL, curproc); 275 if (error == 0) 276 error = error2; 277 } 278 discard: 279 if (so->so_state & SS_NOFDREF) 280 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 281 so->so_state |= SS_NOFDREF; 282 sofree(so); 283 splx(s); 284 return (error); 285 } 286 287 /* 288 * Must be called at splsoftnet. 289 */ 290 int 291 soabort(struct socket *so) 292 { 293 splsoftassert(IPL_SOFTNET); 294 295 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 296 curproc); 297 } 298 299 int 300 soaccept(struct socket *so, struct mbuf *nam) 301 { 302 int error = 0; 303 304 splsoftassert(IPL_SOFTNET); 305 306 if ((so->so_state & SS_NOFDREF) == 0) 307 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 308 so->so_state &= ~SS_NOFDREF; 309 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 310 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 311 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 312 nam, NULL, curproc); 313 else 314 error = ECONNABORTED; 315 return (error); 316 } 317 318 int 319 soconnect(struct socket *so, struct mbuf *nam) 320 { 321 int s; 322 int error; 323 324 if (so->so_options & SO_ACCEPTCONN) 325 return (EOPNOTSUPP); 326 s = splsoftnet(); 327 /* 328 * If protocol is connection-based, can only connect once. 329 * Otherwise, if connected, try to disconnect first. 330 * This allows user to disconnect by connecting to, e.g., 331 * a null address. 332 */ 333 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 334 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 335 (error = sodisconnect(so)))) 336 error = EISCONN; 337 else 338 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 339 NULL, nam, NULL, curproc); 340 splx(s); 341 return (error); 342 } 343 344 int 345 soconnect2(struct socket *so1, struct socket *so2) 346 { 347 int s = splsoftnet(); 348 int error; 349 350 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 351 (struct mbuf *)so2, NULL, curproc); 352 splx(s); 353 return (error); 354 } 355 356 int 357 sodisconnect(struct socket *so) 358 { 359 int error; 360 361 splsoftassert(IPL_SOFTNET); 362 363 if ((so->so_state & SS_ISCONNECTED) == 0) 364 return (ENOTCONN); 365 if (so->so_state & SS_ISDISCONNECTING) 366 return (EALREADY); 367 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 368 NULL, curproc); 369 return (error); 370 } 371 372 int m_getuio(struct mbuf **, int, long, struct uio *); 373 374 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 375 /* 376 * Send on a socket. 377 * If send must go all at once and message is larger than 378 * send buffering, then hard error. 379 * Lock against other senders. 380 * If must go all at once and not enough room now, then 381 * inform user that this would block and do nothing. 382 * Otherwise, if nonblocking, send as much as possible. 383 * The data to be sent is described by "uio" if nonzero, 384 * otherwise by the mbuf chain "top" (which must be null 385 * if uio is not). Data provided in mbuf chain must be small 386 * enough to send all at once. 387 * 388 * Returns nonzero on error, timeout or signal; callers 389 * must check for short counts if EINTR/ERESTART are returned. 390 * Data and control buffers are freed on return. 391 */ 392 int 393 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 394 struct mbuf *control, int flags) 395 { 396 long space, clen = 0; 397 size_t resid; 398 int error, s; 399 int atomic = sosendallatonce(so) || top; 400 401 if (uio) 402 resid = uio->uio_resid; 403 else 404 resid = top->m_pkthdr.len; 405 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 406 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 407 error = EINVAL; 408 goto out; 409 } 410 if (uio && uio->uio_procp) 411 uio->uio_procp->p_ru.ru_msgsnd++; 412 if (control) { 413 /* 414 * In theory clen should be unsigned (since control->m_len is). 415 * However, space must be signed, as it might be less than 0 416 * if we over-committed, and we must use a signed comparison 417 * of space and clen. 418 */ 419 clen = control->m_len; 420 /* reserve extra space for AF_LOCAL's internalize */ 421 if (so->so_proto->pr_domain->dom_family == AF_LOCAL && 422 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 423 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 424 clen = CMSG_SPACE( 425 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 426 (sizeof(struct file *) / sizeof(int))); 427 } 428 429 #define snderr(errno) { error = errno; splx(s); goto release; } 430 431 restart: 432 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 433 goto out; 434 so->so_state |= SS_ISSENDING; 435 do { 436 s = splsoftnet(); 437 if (so->so_state & SS_CANTSENDMORE) 438 snderr(EPIPE); 439 if (so->so_error) { 440 error = so->so_error; 441 so->so_error = 0; 442 splx(s); 443 goto release; 444 } 445 if ((so->so_state & SS_ISCONNECTED) == 0) { 446 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 447 if (!(resid == 0 && clen != 0)) 448 snderr(ENOTCONN); 449 } else if (addr == 0) 450 snderr(EDESTADDRREQ); 451 } 452 space = sbspace(&so->so_snd); 453 if (flags & MSG_OOB) 454 space += 1024; 455 if ((atomic && resid > so->so_snd.sb_hiwat) || 456 (so->so_proto->pr_domain->dom_family != AF_LOCAL && 457 clen > so->so_snd.sb_hiwat)) 458 snderr(EMSGSIZE); 459 if (space < clen || 460 (space - clen < resid && 461 (atomic || space < so->so_snd.sb_lowat))) { 462 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) 463 snderr(EWOULDBLOCK); 464 sbunlock(&so->so_snd); 465 error = sbwait(&so->so_snd); 466 so->so_state &= ~SS_ISSENDING; 467 splx(s); 468 if (error) 469 goto out; 470 goto restart; 471 } 472 splx(s); 473 space -= clen; 474 do { 475 if (uio == NULL) { 476 /* 477 * Data is prepackaged in "top". 478 */ 479 resid = 0; 480 if (flags & MSG_EOR) 481 top->m_flags |= M_EOR; 482 } else { 483 error = m_getuio(&top, atomic, 484 space, uio); 485 if (error) 486 goto release; 487 space -= top->m_pkthdr.len; 488 resid = uio->uio_resid; 489 if (flags & MSG_EOR) 490 top->m_flags |= M_EOR; 491 } 492 s = splsoftnet(); /* XXX */ 493 if (resid == 0) 494 so->so_state &= ~SS_ISSENDING; 495 error = (*so->so_proto->pr_usrreq)(so, 496 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 497 top, addr, control, curproc); 498 splx(s); 499 clen = 0; 500 control = NULL; 501 top = NULL; 502 if (error) 503 goto release; 504 } while (resid && space > 0); 505 } while (resid); 506 507 release: 508 so->so_state &= ~SS_ISSENDING; 509 sbunlock(&so->so_snd); 510 out: 511 if (top) 512 m_freem(top); 513 if (control) 514 m_freem(control); 515 return (error); 516 } 517 518 int 519 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 520 { 521 struct mbuf *m, *top = NULL; 522 struct mbuf **nextp = ⊤ 523 u_long len, mlen; 524 size_t resid = uio->uio_resid; 525 int error; 526 527 do { 528 if (top == NULL) { 529 MGETHDR(m, M_WAIT, MT_DATA); 530 mlen = MHLEN; 531 m->m_pkthdr.len = 0; 532 m->m_pkthdr.ph_ifidx = 0; 533 } else { 534 MGET(m, M_WAIT, MT_DATA); 535 mlen = MLEN; 536 } 537 /* chain mbuf together */ 538 *nextp = m; 539 nextp = &m->m_next; 540 541 resid = ulmin(resid, space); 542 if (resid >= MINCLSIZE) { 543 MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); 544 if ((m->m_flags & M_EXT) == 0) 545 MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); 546 if ((m->m_flags & M_EXT) == 0) 547 goto nopages; 548 mlen = m->m_ext.ext_size; 549 len = ulmin(mlen, resid); 550 /* 551 * For datagram protocols, leave room 552 * for protocol headers in first mbuf. 553 */ 554 if (atomic && top == NULL && len < mlen - max_hdr) 555 m->m_data += max_hdr; 556 } else { 557 nopages: 558 len = ulmin(mlen, resid); 559 /* 560 * For datagram protocols, leave room 561 * for protocol headers in first mbuf. 562 */ 563 if (atomic && top == NULL && len < mlen - max_hdr) 564 MH_ALIGN(m, len); 565 } 566 567 error = uiomove(mtod(m, caddr_t), len, uio); 568 if (error) { 569 m_freem(top); 570 return (error); 571 } 572 573 /* adjust counters */ 574 resid = uio->uio_resid; 575 space -= len; 576 m->m_len = len; 577 top->m_pkthdr.len += len; 578 579 /* Is there more space and more data? */ 580 } while (space > 0 && resid > 0); 581 582 *mp = top; 583 return 0; 584 } 585 586 /* 587 * Following replacement or removal of the first mbuf on the first 588 * mbuf chain of a socket buffer, push necessary state changes back 589 * into the socket buffer so that other consumers see the values 590 * consistently. 'nextrecord' is the callers locally stored value of 591 * the original value of sb->sb_mb->m_nextpkt which must be restored 592 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 593 */ 594 void 595 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 596 { 597 598 /* 599 * First, update for the new value of nextrecord. If necessary, 600 * make it the first record. 601 */ 602 if (sb->sb_mb != NULL) 603 sb->sb_mb->m_nextpkt = nextrecord; 604 else 605 sb->sb_mb = nextrecord; 606 607 /* 608 * Now update any dependent socket buffer fields to reflect 609 * the new state. This is an inline of SB_EMPTY_FIXUP, with 610 * the addition of a second clause that takes care of the 611 * case where sb_mb has been updated, but remains the last 612 * record. 613 */ 614 if (sb->sb_mb == NULL) { 615 sb->sb_mbtail = NULL; 616 sb->sb_lastrecord = NULL; 617 } else if (sb->sb_mb->m_nextpkt == NULL) 618 sb->sb_lastrecord = sb->sb_mb; 619 } 620 621 /* 622 * Implement receive operations on a socket. 623 * We depend on the way that records are added to the sockbuf 624 * by sbappend*. In particular, each record (mbufs linked through m_next) 625 * must begin with an address if the protocol so specifies, 626 * followed by an optional mbuf or mbufs containing ancillary data, 627 * and then zero or more mbufs of data. 628 * In order to avoid blocking network interrupts for the entire time here, 629 * we splx() while doing the actual copy to user space. 630 * Although the sockbuf is locked, new data may still be appended, 631 * and thus we must maintain consistency of the sockbuf during that time. 632 * 633 * The caller may receive the data as a single mbuf chain by supplying 634 * an mbuf **mp0 for use in returning the chain. The uio is then used 635 * only for the count in uio_resid. 636 */ 637 int 638 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 639 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 640 socklen_t controllen) 641 { 642 struct mbuf *m, **mp; 643 struct mbuf *cm; 644 u_long len, offset, moff; 645 int flags, error, s, type, uio_error = 0; 646 struct protosw *pr = so->so_proto; 647 struct mbuf *nextrecord; 648 size_t resid, orig_resid = uio->uio_resid; 649 650 mp = mp0; 651 if (paddr) 652 *paddr = 0; 653 if (controlp) 654 *controlp = 0; 655 if (flagsp) 656 flags = *flagsp &~ MSG_EOR; 657 else 658 flags = 0; 659 if (so->so_state & SS_NBIO) 660 flags |= MSG_DONTWAIT; 661 if (flags & MSG_OOB) { 662 m = m_get(M_WAIT, MT_DATA); 663 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 664 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 665 if (error) 666 goto bad; 667 do { 668 error = uiomove(mtod(m, caddr_t), 669 ulmin(uio->uio_resid, m->m_len), uio); 670 m = m_free(m); 671 } while (uio->uio_resid && error == 0 && m); 672 bad: 673 if (m) 674 m_freem(m); 675 return (error); 676 } 677 if (mp) 678 *mp = NULL; 679 680 restart: 681 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 682 return (error); 683 s = splsoftnet(); 684 685 m = so->so_rcv.sb_mb; 686 #ifdef SOCKET_SPLICE 687 if (isspliced(so)) 688 m = NULL; 689 #endif /* SOCKET_SPLICE */ 690 /* 691 * If we have less data than requested, block awaiting more 692 * (subject to any timeout) if: 693 * 1. the current count is less than the low water mark, 694 * 2. MSG_WAITALL is set, and it is possible to do the entire 695 * receive operation at once if we block (resid <= hiwat), or 696 * 3. MSG_DONTWAIT is not set. 697 * If MSG_WAITALL is set but resid is larger than the receive buffer, 698 * we have to do the receive in sections, and thus risk returning 699 * a short count if a timeout or signal occurs after we start. 700 */ 701 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 702 so->so_rcv.sb_cc < uio->uio_resid) && 703 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 704 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 705 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 706 #ifdef DIAGNOSTIC 707 if (m == NULL && so->so_rcv.sb_cc) 708 #ifdef SOCKET_SPLICE 709 if (!isspliced(so)) 710 #endif /* SOCKET_SPLICE */ 711 panic("receive 1: so %p, so_type %d, sb_cc %lu", 712 so, so->so_type, so->so_rcv.sb_cc); 713 #endif 714 if (so->so_error) { 715 if (m) 716 goto dontblock; 717 error = so->so_error; 718 if ((flags & MSG_PEEK) == 0) 719 so->so_error = 0; 720 goto release; 721 } 722 if (so->so_state & SS_CANTRCVMORE) { 723 if (m) 724 goto dontblock; 725 else if (so->so_rcv.sb_cc == 0) 726 goto release; 727 } 728 for (; m; m = m->m_next) 729 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 730 m = so->so_rcv.sb_mb; 731 goto dontblock; 732 } 733 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 734 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 735 error = ENOTCONN; 736 goto release; 737 } 738 if (uio->uio_resid == 0 && controlp == NULL) 739 goto release; 740 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 741 error = EWOULDBLOCK; 742 goto release; 743 } 744 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 745 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 746 sbunlock(&so->so_rcv); 747 error = sbwait(&so->so_rcv); 748 splx(s); 749 if (error) 750 return (error); 751 goto restart; 752 } 753 dontblock: 754 /* 755 * On entry here, m points to the first record of the socket buffer. 756 * From this point onward, we maintain 'nextrecord' as a cache of the 757 * pointer to the next record in the socket buffer. We must keep the 758 * various socket buffer pointers and local stack versions of the 759 * pointers in sync, pushing out modifications before operations that 760 * may sleep, and re-reading them afterwards. 761 * 762 * Otherwise, we will race with the network stack appending new data 763 * or records onto the socket buffer by using inconsistent/stale 764 * versions of the field, possibly resulting in socket buffer 765 * corruption. 766 */ 767 if (uio->uio_procp) 768 uio->uio_procp->p_ru.ru_msgrcv++; 769 KASSERT(m == so->so_rcv.sb_mb); 770 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 771 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 772 nextrecord = m->m_nextpkt; 773 if (pr->pr_flags & PR_ADDR) { 774 #ifdef DIAGNOSTIC 775 if (m->m_type != MT_SONAME) 776 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 777 so, so->so_type, m, m->m_type); 778 #endif 779 orig_resid = 0; 780 if (flags & MSG_PEEK) { 781 if (paddr) 782 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 783 m = m->m_next; 784 } else { 785 sbfree(&so->so_rcv, m); 786 if (paddr) { 787 *paddr = m; 788 so->so_rcv.sb_mb = m->m_next; 789 m->m_next = 0; 790 m = so->so_rcv.sb_mb; 791 } else { 792 so->so_rcv.sb_mb = m_free(m); 793 m = so->so_rcv.sb_mb; 794 } 795 sbsync(&so->so_rcv, nextrecord); 796 } 797 } 798 while (m && m->m_type == MT_CONTROL && error == 0) { 799 if (flags & MSG_PEEK) { 800 if (controlp) 801 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 802 m = m->m_next; 803 } else { 804 sbfree(&so->so_rcv, m); 805 so->so_rcv.sb_mb = m->m_next; 806 m->m_nextpkt = m->m_next = NULL; 807 cm = m; 808 m = so->so_rcv.sb_mb; 809 sbsync(&so->so_rcv, nextrecord); 810 if (controlp) { 811 if (pr->pr_domain->dom_externalize && 812 mtod(cm, struct cmsghdr *)->cmsg_type == 813 SCM_RIGHTS) 814 error = (*pr->pr_domain->dom_externalize)(cm, 815 controllen, flags); 816 *controlp = cm; 817 } else { 818 /* 819 * Dispose of any SCM_RIGHTS message that went 820 * through the read path rather than recv. 821 */ 822 if (pr->pr_domain->dom_dispose && 823 mtod(cm, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 824 pr->pr_domain->dom_dispose(cm); 825 m_free(cm); 826 } 827 } 828 if (m != NULL) 829 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 830 else 831 nextrecord = so->so_rcv.sb_mb; 832 if (controlp) { 833 orig_resid = 0; 834 controlp = &(*controlp)->m_next; 835 } 836 } 837 838 /* If m is non-NULL, we have some data to read. */ 839 if (m) { 840 type = m->m_type; 841 if (type == MT_OOBDATA) 842 flags |= MSG_OOB; 843 if (m->m_flags & M_BCAST) 844 flags |= MSG_BCAST; 845 if (m->m_flags & M_MCAST) 846 flags |= MSG_MCAST; 847 } 848 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 849 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 850 851 moff = 0; 852 offset = 0; 853 while (m && uio->uio_resid > 0 && error == 0) { 854 if (m->m_type == MT_OOBDATA) { 855 if (type != MT_OOBDATA) 856 break; 857 } else if (type == MT_OOBDATA) 858 break; 859 #ifdef DIAGNOSTIC 860 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 861 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 862 so, so->so_type, m, m->m_type); 863 #endif 864 so->so_state &= ~SS_RCVATMARK; 865 len = uio->uio_resid; 866 if (so->so_oobmark && len > so->so_oobmark - offset) 867 len = so->so_oobmark - offset; 868 if (len > m->m_len - moff) 869 len = m->m_len - moff; 870 /* 871 * If mp is set, just pass back the mbufs. 872 * Otherwise copy them out via the uio, then free. 873 * Sockbuf must be consistent here (points to current mbuf, 874 * it points to next record) when we drop priority; 875 * we must note any additions to the sockbuf when we 876 * block interrupts again. 877 */ 878 if (mp == NULL && uio_error == 0) { 879 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 880 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 881 resid = uio->uio_resid; 882 splx(s); 883 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 884 s = splsoftnet(); 885 if (uio_error) 886 uio->uio_resid = resid - len; 887 } else 888 uio->uio_resid -= len; 889 if (len == m->m_len - moff) { 890 if (m->m_flags & M_EOR) 891 flags |= MSG_EOR; 892 if (flags & MSG_PEEK) { 893 m = m->m_next; 894 moff = 0; 895 } else { 896 nextrecord = m->m_nextpkt; 897 sbfree(&so->so_rcv, m); 898 if (mp) { 899 *mp = m; 900 mp = &m->m_next; 901 so->so_rcv.sb_mb = m = m->m_next; 902 *mp = NULL; 903 } else { 904 so->so_rcv.sb_mb = m_free(m); 905 m = so->so_rcv.sb_mb; 906 } 907 /* 908 * If m != NULL, we also know that 909 * so->so_rcv.sb_mb != NULL. 910 */ 911 KASSERT(so->so_rcv.sb_mb == m); 912 if (m) { 913 m->m_nextpkt = nextrecord; 914 if (nextrecord == NULL) 915 so->so_rcv.sb_lastrecord = m; 916 } else { 917 so->so_rcv.sb_mb = nextrecord; 918 SB_EMPTY_FIXUP(&so->so_rcv); 919 } 920 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 921 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 922 } 923 } else { 924 if (flags & MSG_PEEK) 925 moff += len; 926 else { 927 if (mp) 928 *mp = m_copym(m, 0, len, M_WAIT); 929 m->m_data += len; 930 m->m_len -= len; 931 so->so_rcv.sb_cc -= len; 932 so->so_rcv.sb_datacc -= len; 933 } 934 } 935 if (so->so_oobmark) { 936 if ((flags & MSG_PEEK) == 0) { 937 so->so_oobmark -= len; 938 if (so->so_oobmark == 0) { 939 so->so_state |= SS_RCVATMARK; 940 break; 941 } 942 } else { 943 offset += len; 944 if (offset == so->so_oobmark) 945 break; 946 } 947 } 948 if (flags & MSG_EOR) 949 break; 950 /* 951 * If the MSG_WAITALL flag is set (for non-atomic socket), 952 * we must not quit until "uio->uio_resid == 0" or an error 953 * termination. If a signal/timeout occurs, return 954 * with a short count but without error. 955 * Keep sockbuf locked against other readers. 956 */ 957 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 958 !sosendallatonce(so) && !nextrecord) { 959 if (so->so_error || so->so_state & SS_CANTRCVMORE) 960 break; 961 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 962 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 963 error = sbwait(&so->so_rcv); 964 if (error) { 965 sbunlock(&so->so_rcv); 966 splx(s); 967 return (0); 968 } 969 if ((m = so->so_rcv.sb_mb) != NULL) 970 nextrecord = m->m_nextpkt; 971 } 972 } 973 974 if (m && pr->pr_flags & PR_ATOMIC) { 975 flags |= MSG_TRUNC; 976 if ((flags & MSG_PEEK) == 0) 977 (void) sbdroprecord(&so->so_rcv); 978 } 979 if ((flags & MSG_PEEK) == 0) { 980 if (m == NULL) { 981 /* 982 * First part is an inline SB_EMPTY_FIXUP(). Second 983 * part makes sure sb_lastrecord is up-to-date if 984 * there is still data in the socket buffer. 985 */ 986 so->so_rcv.sb_mb = nextrecord; 987 if (so->so_rcv.sb_mb == NULL) { 988 so->so_rcv.sb_mbtail = NULL; 989 so->so_rcv.sb_lastrecord = NULL; 990 } else if (nextrecord->m_nextpkt == NULL) 991 so->so_rcv.sb_lastrecord = nextrecord; 992 } 993 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 994 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 995 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 996 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 997 (struct mbuf *)(long)flags, NULL, curproc); 998 } 999 if (orig_resid == uio->uio_resid && orig_resid && 1000 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1001 sbunlock(&so->so_rcv); 1002 splx(s); 1003 goto restart; 1004 } 1005 1006 if (uio_error) 1007 error = uio_error; 1008 1009 if (flagsp) 1010 *flagsp |= flags; 1011 release: 1012 sbunlock(&so->so_rcv); 1013 splx(s); 1014 return (error); 1015 } 1016 1017 int 1018 soshutdown(struct socket *so, int how) 1019 { 1020 struct protosw *pr = so->so_proto; 1021 int s, error = 0; 1022 1023 s = splsoftnet(); 1024 switch (how) { 1025 case SHUT_RD: 1026 case SHUT_RDWR: 1027 sorflush(so); 1028 if (how == SHUT_RD) 1029 break; 1030 /* FALLTHROUGH */ 1031 case SHUT_WR: 1032 error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 1033 curproc); 1034 break; 1035 default: 1036 error = EINVAL; 1037 break; 1038 } 1039 splx(s); 1040 return (error); 1041 } 1042 1043 void 1044 sorflush(struct socket *so) 1045 { 1046 struct sockbuf *sb = &so->so_rcv; 1047 struct protosw *pr = so->so_proto; 1048 int s; 1049 struct sockbuf asb; 1050 1051 sb->sb_flags |= SB_NOINTR; 1052 (void) sblock(sb, M_WAITOK); 1053 s = splnet(); 1054 socantrcvmore(so); 1055 sbunlock(sb); 1056 asb = *sb; 1057 memset(sb, 0, sizeof (*sb)); 1058 /* XXX - the memset stomps all over so_rcv */ 1059 if (asb.sb_flags & SB_KNOTE) { 1060 sb->sb_sel.si_note = asb.sb_sel.si_note; 1061 sb->sb_flags = SB_KNOTE; 1062 } 1063 splx(s); 1064 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1065 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1066 sbrelease(&asb); 1067 } 1068 1069 #ifdef SOCKET_SPLICE 1070 1071 #define so_splicelen so_sp->ssp_len 1072 #define so_splicemax so_sp->ssp_max 1073 #define so_idletv so_sp->ssp_idletv 1074 #define so_idleto so_sp->ssp_idleto 1075 #define so_splicetask so_sp->ssp_task 1076 1077 int 1078 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1079 { 1080 struct file *fp; 1081 struct socket *sosp; 1082 int s, error = 0; 1083 1084 if (sosplice_taskq == NULL) 1085 sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 1086 TASKQ_CANTSLEEP); 1087 if (sosplice_taskq == NULL) 1088 return (ENOMEM); 1089 1090 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1091 return (EPROTONOSUPPORT); 1092 if (so->so_options & SO_ACCEPTCONN) 1093 return (EOPNOTSUPP); 1094 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1095 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1096 return (ENOTCONN); 1097 if (so->so_sp == NULL) 1098 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1099 1100 /* If no fd is given, unsplice by removing existing link. */ 1101 if (fd < 0) { 1102 /* Lock receive buffer. */ 1103 if ((error = sblock(&so->so_rcv, 1104 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) 1105 return (error); 1106 s = splsoftnet(); 1107 if (so->so_sp->ssp_socket) 1108 sounsplice(so, so->so_sp->ssp_socket, 1); 1109 splx(s); 1110 sbunlock(&so->so_rcv); 1111 return (0); 1112 } 1113 1114 if (max && max < 0) 1115 return (EINVAL); 1116 1117 if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0)) 1118 return (EINVAL); 1119 1120 /* Find sosp, the drain socket where data will be spliced into. */ 1121 if ((error = getsock(curproc, fd, &fp)) != 0) 1122 return (error); 1123 sosp = fp->f_data; 1124 if (sosp->so_sp == NULL) 1125 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1126 1127 /* Lock both receive and send buffer. */ 1128 if ((error = sblock(&so->so_rcv, 1129 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1130 FRELE(fp, curproc); 1131 return (error); 1132 } 1133 if ((error = sblock(&sosp->so_snd, M_WAITOK)) != 0) { 1134 sbunlock(&so->so_rcv); 1135 FRELE(fp, curproc); 1136 return (error); 1137 } 1138 s = splsoftnet(); 1139 1140 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1141 error = EBUSY; 1142 goto release; 1143 } 1144 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1145 error = EPROTONOSUPPORT; 1146 goto release; 1147 } 1148 if (sosp->so_options & SO_ACCEPTCONN) { 1149 error = EOPNOTSUPP; 1150 goto release; 1151 } 1152 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1153 error = ENOTCONN; 1154 goto release; 1155 } 1156 1157 /* Splice so and sosp together. */ 1158 so->so_sp->ssp_socket = sosp; 1159 sosp->so_sp->ssp_soback = so; 1160 so->so_splicelen = 0; 1161 so->so_splicemax = max; 1162 if (tv) 1163 so->so_idletv = *tv; 1164 else 1165 timerclear(&so->so_idletv); 1166 timeout_set(&so->so_idleto, soidle, so); 1167 task_set(&so->so_splicetask, sotask, so); 1168 1169 /* 1170 * To prevent softnet interrupt from calling somove() while 1171 * we sleep, the socket buffers are not marked as spliced yet. 1172 */ 1173 if (somove(so, M_WAIT)) { 1174 so->so_rcv.sb_flagsintr |= SB_SPLICE; 1175 sosp->so_snd.sb_flagsintr |= SB_SPLICE; 1176 } 1177 1178 release: 1179 splx(s); 1180 sbunlock(&sosp->so_snd); 1181 sbunlock(&so->so_rcv); 1182 FRELE(fp, curproc); 1183 return (error); 1184 } 1185 1186 void 1187 sounsplice(struct socket *so, struct socket *sosp, int wakeup) 1188 { 1189 splsoftassert(IPL_SOFTNET); 1190 1191 task_del(sosplice_taskq, &so->so_splicetask); 1192 timeout_del(&so->so_idleto); 1193 sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; 1194 so->so_rcv.sb_flagsintr &= ~SB_SPLICE; 1195 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1196 if (wakeup && soreadable(so)) 1197 sorwakeup(so); 1198 } 1199 1200 void 1201 soidle(void *arg) 1202 { 1203 struct socket *so = arg; 1204 int s; 1205 1206 s = splsoftnet(); 1207 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1208 so->so_error = ETIMEDOUT; 1209 sounsplice(so, so->so_sp->ssp_socket, 1); 1210 } 1211 splx(s); 1212 } 1213 1214 void 1215 sotask(void *arg) 1216 { 1217 struct socket *so = arg; 1218 int s; 1219 1220 s = splsoftnet(); 1221 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1222 /* 1223 * We may not sleep here as sofree() and unsplice() may be 1224 * called from softnet interrupt context. This would remove 1225 * the socket during somove(). 1226 */ 1227 somove(so, M_DONTWAIT); 1228 } 1229 splx(s); 1230 1231 /* Avoid user land starvation. */ 1232 yield(); 1233 } 1234 1235 /* 1236 * Move data from receive buffer of spliced source socket to send 1237 * buffer of drain socket. Try to move as much as possible in one 1238 * big chunk. It is a TCP only implementation. 1239 * Return value 0 means splicing has been finished, 1 continue. 1240 */ 1241 int 1242 somove(struct socket *so, int wait) 1243 { 1244 struct socket *sosp = so->so_sp->ssp_socket; 1245 struct mbuf *m, **mp, *nextrecord; 1246 u_long len, off, oobmark; 1247 long space; 1248 int error = 0, maxreached = 0; 1249 short state; 1250 1251 splsoftassert(IPL_SOFTNET); 1252 1253 nextpkt: 1254 if (so->so_error) { 1255 error = so->so_error; 1256 goto release; 1257 } 1258 if (sosp->so_state & SS_CANTSENDMORE) { 1259 error = EPIPE; 1260 goto release; 1261 } 1262 if (sosp->so_error && sosp->so_error != ETIMEDOUT && 1263 sosp->so_error != EFBIG && sosp->so_error != ELOOP) { 1264 error = sosp->so_error; 1265 goto release; 1266 } 1267 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1268 goto release; 1269 1270 /* Calculate how many bytes can be copied now. */ 1271 len = so->so_rcv.sb_datacc; 1272 if (so->so_splicemax) { 1273 KASSERT(so->so_splicelen < so->so_splicemax); 1274 if (so->so_splicemax <= so->so_splicelen + len) { 1275 len = so->so_splicemax - so->so_splicelen; 1276 maxreached = 1; 1277 } 1278 } 1279 space = sbspace(&sosp->so_snd); 1280 if (so->so_oobmark && so->so_oobmark < len && 1281 so->so_oobmark < space + 1024) 1282 space += 1024; 1283 if (space <= 0) { 1284 maxreached = 0; 1285 goto release; 1286 } 1287 if (space < len) { 1288 maxreached = 0; 1289 if (space < sosp->so_snd.sb_lowat) 1290 goto release; 1291 len = space; 1292 } 1293 sosp->so_state |= SS_ISSENDING; 1294 1295 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1296 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1297 m = so->so_rcv.sb_mb; 1298 if (m == NULL) 1299 goto release; 1300 nextrecord = m->m_nextpkt; 1301 1302 /* Drop address and control information not used with splicing. */ 1303 if (so->so_proto->pr_flags & PR_ADDR) { 1304 #ifdef DIAGNOSTIC 1305 if (m->m_type != MT_SONAME) 1306 panic("somove soname: so %p, so_type %d, m %p, " 1307 "m_type %d", so, so->so_type, m, m->m_type); 1308 #endif 1309 m = m->m_next; 1310 } 1311 while (m && m->m_type == MT_CONTROL) 1312 m = m->m_next; 1313 if (m == NULL) { 1314 sbdroprecord(&so->so_rcv); 1315 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1316 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1317 NULL, NULL, NULL); 1318 goto nextpkt; 1319 } 1320 1321 /* 1322 * By splicing sockets connected to localhost, userland might create a 1323 * loop. Dissolve splicing with error if loop is detected by counter. 1324 */ 1325 if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 1326 error = ELOOP; 1327 goto release; 1328 } 1329 1330 if (so->so_proto->pr_flags & PR_ATOMIC) { 1331 if ((m->m_flags & M_PKTHDR) == 0) 1332 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1333 "m_type %d", so, so->so_type, m, m->m_type); 1334 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1335 error = EMSGSIZE; 1336 goto release; 1337 } 1338 if (len < m->m_pkthdr.len) 1339 goto release; 1340 if (m->m_pkthdr.len < len) { 1341 maxreached = 0; 1342 len = m->m_pkthdr.len; 1343 } 1344 /* 1345 * Throw away the name mbuf after it has been assured 1346 * that the whole first record can be processed. 1347 */ 1348 m = so->so_rcv.sb_mb; 1349 sbfree(&so->so_rcv, m); 1350 so->so_rcv.sb_mb = m_free(m); 1351 sbsync(&so->so_rcv, nextrecord); 1352 } 1353 /* 1354 * Throw away the control mbufs after it has been assured 1355 * that the whole first record can be processed. 1356 */ 1357 m = so->so_rcv.sb_mb; 1358 while (m && m->m_type == MT_CONTROL) { 1359 sbfree(&so->so_rcv, m); 1360 so->so_rcv.sb_mb = m_free(m); 1361 m = so->so_rcv.sb_mb; 1362 sbsync(&so->so_rcv, nextrecord); 1363 } 1364 1365 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1366 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1367 1368 /* Take at most len mbufs out of receive buffer. */ 1369 for (off = 0, mp = &m; off <= len && *mp; 1370 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1371 u_long size = len - off; 1372 1373 #ifdef DIAGNOSTIC 1374 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1375 panic("somove type: so %p, so_type %d, m %p, " 1376 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1377 #endif 1378 if ((*mp)->m_len > size) { 1379 if (!maxreached || (*mp = m_copym( 1380 so->so_rcv.sb_mb, 0, size, wait)) == NULL) { 1381 len -= size; 1382 break; 1383 } 1384 so->so_rcv.sb_mb->m_data += size; 1385 so->so_rcv.sb_mb->m_len -= size; 1386 so->so_rcv.sb_cc -= size; 1387 so->so_rcv.sb_datacc -= size; 1388 } else { 1389 *mp = so->so_rcv.sb_mb; 1390 sbfree(&so->so_rcv, *mp); 1391 so->so_rcv.sb_mb = (*mp)->m_next; 1392 sbsync(&so->so_rcv, nextrecord); 1393 } 1394 } 1395 *mp = NULL; 1396 1397 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1398 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1399 SBCHECK(&so->so_rcv); 1400 if (m == NULL) 1401 goto release; 1402 m->m_nextpkt = NULL; 1403 if (m->m_flags & M_PKTHDR) { 1404 m_resethdr(m); 1405 m->m_pkthdr.len = len; 1406 } 1407 1408 /* Send window update to source peer as receive buffer has changed. */ 1409 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1410 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1411 NULL, NULL, NULL); 1412 1413 /* Receive buffer did shrink by len bytes, adjust oob. */ 1414 state = so->so_state; 1415 so->so_state &= ~SS_RCVATMARK; 1416 oobmark = so->so_oobmark; 1417 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1418 if (oobmark) { 1419 if (oobmark == len) 1420 so->so_state |= SS_RCVATMARK; 1421 if (oobmark >= len) 1422 oobmark = 0; 1423 } 1424 1425 /* 1426 * Handle oob data. If any malloc fails, ignore error. 1427 * TCP urgent data is not very reliable anyway. 1428 */ 1429 while (((state & SS_RCVATMARK) || oobmark) && 1430 (so->so_options & SO_OOBINLINE)) { 1431 struct mbuf *o = NULL; 1432 1433 if (state & SS_RCVATMARK) { 1434 o = m_get(wait, MT_DATA); 1435 state &= ~SS_RCVATMARK; 1436 } else if (oobmark) { 1437 o = m_split(m, oobmark, wait); 1438 if (o) { 1439 error = (*sosp->so_proto->pr_usrreq)(sosp, 1440 PRU_SEND, m, NULL, NULL, NULL); 1441 if (error) { 1442 if (sosp->so_state & SS_CANTSENDMORE) 1443 error = EPIPE; 1444 m_freem(o); 1445 goto release; 1446 } 1447 len -= oobmark; 1448 so->so_splicelen += oobmark; 1449 m = o; 1450 o = m_get(wait, MT_DATA); 1451 } 1452 oobmark = 0; 1453 } 1454 if (o) { 1455 o->m_len = 1; 1456 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1457 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1458 o, NULL, NULL, NULL); 1459 if (error) { 1460 if (sosp->so_state & SS_CANTSENDMORE) 1461 error = EPIPE; 1462 m_freem(m); 1463 goto release; 1464 } 1465 len -= 1; 1466 so->so_splicelen += 1; 1467 if (oobmark) { 1468 oobmark -= 1; 1469 if (oobmark == 0) 1470 state |= SS_RCVATMARK; 1471 } 1472 m_adj(m, 1); 1473 } 1474 } 1475 1476 /* Append all remaining data to drain socket. */ 1477 if (so->so_rcv.sb_cc == 0 || maxreached) 1478 sosp->so_state &= ~SS_ISSENDING; 1479 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, 1480 NULL); 1481 if (error) { 1482 if (sosp->so_state & SS_CANTSENDMORE) 1483 error = EPIPE; 1484 goto release; 1485 } 1486 so->so_splicelen += len; 1487 1488 /* Move several packets if possible. */ 1489 if (!maxreached && nextrecord) 1490 goto nextpkt; 1491 1492 release: 1493 sosp->so_state &= ~SS_ISSENDING; 1494 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1495 error = EFBIG; 1496 if (error) 1497 so->so_error = error; 1498 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1499 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1500 sounsplice(so, sosp, 1); 1501 return (0); 1502 } 1503 if (timerisset(&so->so_idletv)) 1504 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1505 return (1); 1506 } 1507 1508 #endif /* SOCKET_SPLICE */ 1509 1510 void 1511 sorwakeup(struct socket *so) 1512 { 1513 splsoftassert(IPL_SOFTNET); 1514 1515 #ifdef SOCKET_SPLICE 1516 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1517 /* 1518 * TCP has a sendbuffer that can handle multiple packets 1519 * at once. So queue the stream a bit to accumulate data. 1520 * The sosplice thread will call somove() later and send 1521 * the packets calling tcp_output() only once. 1522 * In the UDP case, send out the packets immediately. 1523 * Using a thread would make things slower. 1524 */ 1525 if (so->so_proto->pr_flags & PR_WANTRCVD) 1526 task_add(sosplice_taskq, &so->so_splicetask); 1527 else 1528 somove(so, M_DONTWAIT); 1529 } 1530 if (isspliced(so)) 1531 return; 1532 #endif 1533 sowakeup(so, &so->so_rcv); 1534 if (so->so_upcall) 1535 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1536 } 1537 1538 void 1539 sowwakeup(struct socket *so) 1540 { 1541 splsoftassert(IPL_SOFTNET); 1542 1543 #ifdef SOCKET_SPLICE 1544 if (so->so_snd.sb_flagsintr & SB_SPLICE) 1545 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); 1546 #endif 1547 sowakeup(so, &so->so_snd); 1548 } 1549 1550 int 1551 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1552 { 1553 int error = 0; 1554 struct mbuf *m = m0; 1555 1556 if (level != SOL_SOCKET) { 1557 if (so->so_proto && so->so_proto->pr_ctloutput) 1558 return ((*so->so_proto->pr_ctloutput) 1559 (PRCO_SETOPT, so, level, optname, &m0)); 1560 error = ENOPROTOOPT; 1561 } else { 1562 switch (optname) { 1563 case SO_BINDANY: 1564 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1565 goto bad; 1566 break; 1567 } 1568 1569 switch (optname) { 1570 1571 case SO_LINGER: 1572 if (m == NULL || m->m_len != sizeof (struct linger) || 1573 mtod(m, struct linger *)->l_linger < 0 || 1574 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1575 error = EINVAL; 1576 goto bad; 1577 } 1578 so->so_linger = mtod(m, struct linger *)->l_linger; 1579 /* FALLTHROUGH */ 1580 1581 case SO_BINDANY: 1582 case SO_DEBUG: 1583 case SO_KEEPALIVE: 1584 case SO_USELOOPBACK: 1585 case SO_BROADCAST: 1586 case SO_REUSEADDR: 1587 case SO_REUSEPORT: 1588 case SO_OOBINLINE: 1589 case SO_TIMESTAMP: 1590 if (m == NULL || m->m_len < sizeof (int)) { 1591 error = EINVAL; 1592 goto bad; 1593 } 1594 if (*mtod(m, int *)) 1595 so->so_options |= optname; 1596 else 1597 so->so_options &= ~optname; 1598 break; 1599 1600 case SO_DONTROUTE: 1601 if (m == NULL || m->m_len < sizeof (int)) { 1602 error = EINVAL; 1603 goto bad; 1604 } 1605 if (*mtod(m, int *)) 1606 error = EOPNOTSUPP; 1607 break; 1608 1609 case SO_SNDBUF: 1610 case SO_RCVBUF: 1611 case SO_SNDLOWAT: 1612 case SO_RCVLOWAT: 1613 { 1614 u_long cnt; 1615 1616 if (m == NULL || m->m_len < sizeof (int)) { 1617 error = EINVAL; 1618 goto bad; 1619 } 1620 cnt = *mtod(m, int *); 1621 if ((long)cnt <= 0) 1622 cnt = 1; 1623 switch (optname) { 1624 1625 case SO_SNDBUF: 1626 if (so->so_state & SS_CANTSENDMORE) { 1627 error = EINVAL; 1628 goto bad; 1629 } 1630 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1631 sbreserve(&so->so_snd, cnt)) { 1632 error = ENOBUFS; 1633 goto bad; 1634 } 1635 so->so_snd.sb_wat = cnt; 1636 break; 1637 1638 case SO_RCVBUF: 1639 if (so->so_state & SS_CANTRCVMORE) { 1640 error = EINVAL; 1641 goto bad; 1642 } 1643 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1644 sbreserve(&so->so_rcv, cnt)) { 1645 error = ENOBUFS; 1646 goto bad; 1647 } 1648 so->so_rcv.sb_wat = cnt; 1649 break; 1650 1651 case SO_SNDLOWAT: 1652 so->so_snd.sb_lowat = 1653 (cnt > so->so_snd.sb_hiwat) ? 1654 so->so_snd.sb_hiwat : cnt; 1655 break; 1656 case SO_RCVLOWAT: 1657 so->so_rcv.sb_lowat = 1658 (cnt > so->so_rcv.sb_hiwat) ? 1659 so->so_rcv.sb_hiwat : cnt; 1660 break; 1661 } 1662 break; 1663 } 1664 1665 case SO_SNDTIMEO: 1666 case SO_RCVTIMEO: 1667 { 1668 struct timeval tv; 1669 int val; 1670 1671 if (m == NULL || m->m_len < sizeof (tv)) { 1672 error = EINVAL; 1673 goto bad; 1674 } 1675 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 1676 val = tvtohz(&tv); 1677 if (val > USHRT_MAX) { 1678 error = EDOM; 1679 goto bad; 1680 } 1681 1682 switch (optname) { 1683 1684 case SO_SNDTIMEO: 1685 so->so_snd.sb_timeo = val; 1686 break; 1687 case SO_RCVTIMEO: 1688 so->so_rcv.sb_timeo = val; 1689 break; 1690 } 1691 break; 1692 } 1693 1694 case SO_RTABLE: 1695 if (so->so_proto && so->so_proto->pr_domain && 1696 so->so_proto->pr_domain->dom_protosw && 1697 so->so_proto->pr_ctloutput) { 1698 struct domain *dom = so->so_proto->pr_domain; 1699 1700 level = dom->dom_protosw->pr_protocol; 1701 return ((*so->so_proto->pr_ctloutput) 1702 (PRCO_SETOPT, so, level, optname, &m0)); 1703 } 1704 error = ENOPROTOOPT; 1705 break; 1706 1707 #ifdef SOCKET_SPLICE 1708 case SO_SPLICE: 1709 if (m == NULL) { 1710 error = sosplice(so, -1, 0, NULL); 1711 } else if (m->m_len < sizeof(int)) { 1712 error = EINVAL; 1713 goto bad; 1714 } else if (m->m_len < sizeof(struct splice)) { 1715 error = sosplice(so, *mtod(m, int *), 0, NULL); 1716 } else { 1717 error = sosplice(so, 1718 mtod(m, struct splice *)->sp_fd, 1719 mtod(m, struct splice *)->sp_max, 1720 &mtod(m, struct splice *)->sp_idle); 1721 } 1722 break; 1723 #endif /* SOCKET_SPLICE */ 1724 1725 default: 1726 error = ENOPROTOOPT; 1727 break; 1728 } 1729 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1730 (void) ((*so->so_proto->pr_ctloutput) 1731 (PRCO_SETOPT, so, level, optname, &m0)); 1732 m = NULL; /* freed by protocol */ 1733 } 1734 } 1735 bad: 1736 if (m) 1737 (void) m_free(m); 1738 return (error); 1739 } 1740 1741 int 1742 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1743 { 1744 struct mbuf *m; 1745 1746 if (level != SOL_SOCKET) { 1747 if (so->so_proto && so->so_proto->pr_ctloutput) { 1748 return ((*so->so_proto->pr_ctloutput) 1749 (PRCO_GETOPT, so, level, optname, mp)); 1750 } else 1751 return (ENOPROTOOPT); 1752 } else { 1753 m = m_get(M_WAIT, MT_SOOPTS); 1754 m->m_len = sizeof (int); 1755 1756 switch (optname) { 1757 1758 case SO_LINGER: 1759 m->m_len = sizeof (struct linger); 1760 mtod(m, struct linger *)->l_onoff = 1761 so->so_options & SO_LINGER; 1762 mtod(m, struct linger *)->l_linger = so->so_linger; 1763 break; 1764 1765 case SO_BINDANY: 1766 case SO_USELOOPBACK: 1767 case SO_DEBUG: 1768 case SO_KEEPALIVE: 1769 case SO_REUSEADDR: 1770 case SO_REUSEPORT: 1771 case SO_BROADCAST: 1772 case SO_OOBINLINE: 1773 case SO_TIMESTAMP: 1774 *mtod(m, int *) = so->so_options & optname; 1775 break; 1776 1777 case SO_DONTROUTE: 1778 *mtod(m, int *) = 0; 1779 break; 1780 1781 case SO_TYPE: 1782 *mtod(m, int *) = so->so_type; 1783 break; 1784 1785 case SO_ERROR: 1786 *mtod(m, int *) = so->so_error; 1787 so->so_error = 0; 1788 break; 1789 1790 case SO_SNDBUF: 1791 *mtod(m, int *) = so->so_snd.sb_hiwat; 1792 break; 1793 1794 case SO_RCVBUF: 1795 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1796 break; 1797 1798 case SO_SNDLOWAT: 1799 *mtod(m, int *) = so->so_snd.sb_lowat; 1800 break; 1801 1802 case SO_RCVLOWAT: 1803 *mtod(m, int *) = so->so_rcv.sb_lowat; 1804 break; 1805 1806 case SO_SNDTIMEO: 1807 case SO_RCVTIMEO: 1808 { 1809 struct timeval tv; 1810 int val = (optname == SO_SNDTIMEO ? 1811 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1812 1813 m->m_len = sizeof(struct timeval); 1814 memset(&tv, 0, sizeof(tv)); 1815 tv.tv_sec = val / hz; 1816 tv.tv_usec = (val % hz) * tick; 1817 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 1818 break; 1819 } 1820 1821 case SO_RTABLE: 1822 (void)m_free(m); 1823 if (so->so_proto && so->so_proto->pr_domain && 1824 so->so_proto->pr_domain->dom_protosw && 1825 so->so_proto->pr_ctloutput) { 1826 struct domain *dom = so->so_proto->pr_domain; 1827 1828 level = dom->dom_protosw->pr_protocol; 1829 return ((*so->so_proto->pr_ctloutput) 1830 (PRCO_GETOPT, so, level, optname, mp)); 1831 } 1832 return (ENOPROTOOPT); 1833 break; 1834 1835 #ifdef SOCKET_SPLICE 1836 case SO_SPLICE: 1837 { 1838 off_t len; 1839 int s = splsoftnet(); 1840 1841 m->m_len = sizeof(off_t); 1842 len = so->so_sp ? so->so_sp->ssp_len : 0; 1843 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 1844 splx(s); 1845 break; 1846 } 1847 #endif /* SOCKET_SPLICE */ 1848 1849 case SO_PEERCRED: 1850 if (so->so_proto->pr_protocol == AF_UNIX) { 1851 struct unpcb *unp = sotounpcb(so); 1852 1853 if (unp->unp_flags & UNP_FEIDS) { 1854 m->m_len = sizeof(unp->unp_connid); 1855 memcpy(mtod(m, caddr_t), 1856 &(unp->unp_connid), m->m_len); 1857 break; 1858 } 1859 (void)m_free(m); 1860 return (ENOTCONN); 1861 } 1862 (void)m_free(m); 1863 return (EOPNOTSUPP); 1864 break; 1865 1866 default: 1867 (void)m_free(m); 1868 return (ENOPROTOOPT); 1869 } 1870 *mp = m; 1871 return (0); 1872 } 1873 } 1874 1875 void 1876 sohasoutofband(struct socket *so) 1877 { 1878 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1879 selwakeup(&so->so_rcv.sb_sel); 1880 } 1881 1882 int 1883 soo_kqfilter(struct file *fp, struct knote *kn) 1884 { 1885 struct socket *so = kn->kn_fp->f_data; 1886 struct sockbuf *sb; 1887 int s; 1888 1889 switch (kn->kn_filter) { 1890 case EVFILT_READ: 1891 if (so->so_options & SO_ACCEPTCONN) 1892 kn->kn_fop = &solisten_filtops; 1893 else 1894 kn->kn_fop = &soread_filtops; 1895 sb = &so->so_rcv; 1896 break; 1897 case EVFILT_WRITE: 1898 kn->kn_fop = &sowrite_filtops; 1899 sb = &so->so_snd; 1900 break; 1901 default: 1902 return (EINVAL); 1903 } 1904 1905 s = splnet(); 1906 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1907 sb->sb_flags |= SB_KNOTE; 1908 splx(s); 1909 return (0); 1910 } 1911 1912 void 1913 filt_sordetach(struct knote *kn) 1914 { 1915 struct socket *so = kn->kn_fp->f_data; 1916 int s = splnet(); 1917 1918 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1919 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1920 so->so_rcv.sb_flags &= ~SB_KNOTE; 1921 splx(s); 1922 } 1923 1924 int 1925 filt_soread(struct knote *kn, long hint) 1926 { 1927 struct socket *so = kn->kn_fp->f_data; 1928 1929 kn->kn_data = so->so_rcv.sb_cc; 1930 #ifdef SOCKET_SPLICE 1931 if (isspliced(so)) 1932 return (0); 1933 #endif /* SOCKET_SPLICE */ 1934 if (so->so_state & SS_CANTRCVMORE) { 1935 kn->kn_flags |= EV_EOF; 1936 kn->kn_fflags = so->so_error; 1937 return (1); 1938 } 1939 if (so->so_error) /* temporary udp error */ 1940 return (1); 1941 if (kn->kn_sfflags & NOTE_LOWAT) 1942 return (kn->kn_data >= kn->kn_sdata); 1943 return (kn->kn_data >= so->so_rcv.sb_lowat); 1944 } 1945 1946 void 1947 filt_sowdetach(struct knote *kn) 1948 { 1949 struct socket *so = kn->kn_fp->f_data; 1950 int s = splnet(); 1951 1952 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1953 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1954 so->so_snd.sb_flags &= ~SB_KNOTE; 1955 splx(s); 1956 } 1957 1958 int 1959 filt_sowrite(struct knote *kn, long hint) 1960 { 1961 struct socket *so = kn->kn_fp->f_data; 1962 1963 kn->kn_data = sbspace(&so->so_snd); 1964 if (so->so_state & SS_CANTSENDMORE) { 1965 kn->kn_flags |= EV_EOF; 1966 kn->kn_fflags = so->so_error; 1967 return (1); 1968 } 1969 if (so->so_error) /* temporary udp error */ 1970 return (1); 1971 if (((so->so_state & SS_ISCONNECTED) == 0) && 1972 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1973 return (0); 1974 if (kn->kn_sfflags & NOTE_LOWAT) 1975 return (kn->kn_data >= kn->kn_sdata); 1976 return (kn->kn_data >= so->so_snd.sb_lowat); 1977 } 1978 1979 int 1980 filt_solisten(struct knote *kn, long hint) 1981 { 1982 struct socket *so = kn->kn_fp->f_data; 1983 1984 kn->kn_data = so->so_qlen; 1985 return (so->so_qlen != 0); 1986 } 1987 1988 #ifdef DDB 1989 void 1990 sobuf_print(struct sockbuf *, 1991 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 1992 1993 void 1994 sobuf_print(struct sockbuf *sb, 1995 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1996 { 1997 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 1998 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 1999 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2000 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2001 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2002 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2003 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2004 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2005 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2006 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2007 (*pr)("\tsb_sel: ...\n"); 2008 (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); 2009 (*pr)("\tsb_flags: %i\n", sb->sb_flags); 2010 (*pr)("\tsb_timeo: %i\n", sb->sb_timeo); 2011 } 2012 2013 void 2014 so_print(void *v, 2015 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2016 { 2017 struct socket *so = v; 2018 2019 (*pr)("socket %p\n", so); 2020 (*pr)("so_type: %i\n", so->so_type); 2021 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2022 (*pr)("so_linger: %i\n", so->so_linger); 2023 (*pr)("so_state: %i\n", so->so_state); 2024 (*pr)("so_pcb: %p\n", so->so_pcb); 2025 (*pr)("so_proto: %p\n", so->so_proto); 2026 2027 (*pr)("so_head: %p\n", so->so_head); 2028 (*pr)("so_onq: %p\n", so->so_onq); 2029 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2030 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2031 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2032 (*pr)("so_q0len: %i\n", so->so_q0len); 2033 (*pr)("so_qlen: %i\n", so->so_qlen); 2034 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2035 (*pr)("so_timeo: %i\n", so->so_timeo); 2036 (*pr)("so_pgid: %i\n", so->so_pgid); 2037 (*pr)("so_siguid: %i\n", so->so_siguid); 2038 (*pr)("so_sigeuid: %i\n", so->so_sigeuid); 2039 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2040 2041 (*pr)("so_sp: %p\n", so->so_sp); 2042 if (so->so_sp != NULL) { 2043 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2044 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2045 (*pr)("\tssp_len: %lld\n", 2046 (unsigned long long)so->so_sp->ssp_len); 2047 (*pr)("\tssp_max: %lld\n", 2048 (unsigned long long)so->so_sp->ssp_max); 2049 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2050 so->so_sp->ssp_idletv.tv_usec); 2051 (*pr)("\tssp_idleto: %spending (@%i)\n", 2052 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2053 so->so_sp->ssp_idleto.to_time); 2054 } 2055 2056 (*pr)("so_rcv:\n"); 2057 sobuf_print(&so->so_rcv, pr); 2058 (*pr)("so_snd:\n"); 2059 sobuf_print(&so->so_snd, pr); 2060 2061 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2062 so->so_upcall, so->so_upcallarg); 2063 2064 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2065 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2066 (*pr)("so_cpid: %d\n", so->so_cpid); 2067 } 2068 #endif 2069 2070