1 /* $OpenBSD: uipc_socket.c,v 1.171 2016/12/29 12:12:43 mpi Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <net/if.h> 51 #include <sys/pool.h> 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #endif 56 57 void sbsync(struct sockbuf *, struct mbuf *); 58 59 int sosplice(struct socket *, int, off_t, struct timeval *); 60 void sounsplice(struct socket *, struct socket *, int); 61 void soidle(void *); 62 void sotask(void *); 63 int somove(struct socket *, int); 64 65 void filt_sordetach(struct knote *kn); 66 int filt_soread(struct knote *kn, long hint); 67 void filt_sowdetach(struct knote *kn); 68 int filt_sowrite(struct knote *kn, long hint); 69 int filt_solisten(struct knote *kn, long hint); 70 71 struct filterops solisten_filtops = 72 { 1, NULL, filt_sordetach, filt_solisten }; 73 struct filterops soread_filtops = 74 { 1, NULL, filt_sordetach, filt_soread }; 75 struct filterops sowrite_filtops = 76 { 1, NULL, filt_sowdetach, filt_sowrite }; 77 78 79 #ifndef SOMINCONN 80 #define SOMINCONN 80 81 #endif /* SOMINCONN */ 82 83 int somaxconn = SOMAXCONN; 84 int sominconn = SOMINCONN; 85 86 struct pool socket_pool; 87 #ifdef SOCKET_SPLICE 88 struct pool sosplice_pool; 89 struct taskq *sosplice_taskq; 90 #endif 91 92 void 93 soinit(void) 94 { 95 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 96 "sockpl", NULL); 97 #ifdef SOCKET_SPLICE 98 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 99 "sosppl", NULL); 100 #endif 101 } 102 103 /* 104 * Socket operation routines. 105 * These routines are called by the routines in 106 * sys_socket.c or from a system process, and 107 * implement the semantics of socket operations by 108 * switching out to the protocol specific routines. 109 */ 110 int 111 socreate(int dom, struct socket **aso, int type, int proto) 112 { 113 struct proc *p = curproc; /* XXX */ 114 struct protosw *prp; 115 struct socket *so; 116 int error, s; 117 118 if (proto) 119 prp = pffindproto(dom, proto, type); 120 else 121 prp = pffindtype(dom, type); 122 if (prp == NULL || prp->pr_usrreq == 0) 123 return (EPROTONOSUPPORT); 124 if (prp->pr_type != type) 125 return (EPROTOTYPE); 126 NET_LOCK(s); 127 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 128 TAILQ_INIT(&so->so_q0); 129 TAILQ_INIT(&so->so_q); 130 so->so_type = type; 131 if (suser(p, 0) == 0) 132 so->so_state = SS_PRIV; 133 so->so_ruid = p->p_ucred->cr_ruid; 134 so->so_euid = p->p_ucred->cr_uid; 135 so->so_rgid = p->p_ucred->cr_rgid; 136 so->so_egid = p->p_ucred->cr_gid; 137 so->so_cpid = p->p_p->ps_pid; 138 so->so_proto = prp; 139 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 140 (struct mbuf *)(long)proto, NULL, p); 141 if (error) { 142 so->so_state |= SS_NOFDREF; 143 sofree(so); 144 NET_UNLOCK(s); 145 return (error); 146 } 147 NET_UNLOCK(s); 148 *aso = so; 149 return (0); 150 } 151 152 int 153 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 154 { 155 int s, error; 156 157 NET_LOCK(s); 158 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 159 NET_UNLOCK(s); 160 return (error); 161 } 162 163 int 164 solisten(struct socket *so, int backlog) 165 { 166 int s, error; 167 168 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 169 return (EOPNOTSUPP); 170 #ifdef SOCKET_SPLICE 171 if (isspliced(so) || issplicedback(so)) 172 return (EOPNOTSUPP); 173 #endif /* SOCKET_SPLICE */ 174 NET_LOCK(s); 175 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 176 curproc); 177 if (error) { 178 NET_UNLOCK(s); 179 return (error); 180 } 181 if (TAILQ_FIRST(&so->so_q) == NULL) 182 so->so_options |= SO_ACCEPTCONN; 183 if (backlog < 0 || backlog > somaxconn) 184 backlog = somaxconn; 185 if (backlog < sominconn) 186 backlog = sominconn; 187 so->so_qlimit = backlog; 188 NET_UNLOCK(s); 189 return (0); 190 } 191 192 void 193 sofree(struct socket *so) 194 { 195 NET_ASSERT_LOCKED(); 196 197 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 198 return; 199 if (so->so_head) { 200 /* 201 * We must not decommission a socket that's on the accept(2) 202 * queue. If we do, then accept(2) may hang after select(2) 203 * indicated that the listening socket was ready. 204 */ 205 if (!soqremque(so, 0)) 206 return; 207 } 208 #ifdef SOCKET_SPLICE 209 if (so->so_sp) { 210 if (issplicedback(so)) 211 sounsplice(so->so_sp->ssp_soback, so, 212 so->so_sp->ssp_soback != so); 213 if (isspliced(so)) 214 sounsplice(so, so->so_sp->ssp_socket, 0); 215 pool_put(&sosplice_pool, so->so_sp); 216 so->so_sp = NULL; 217 } 218 #endif /* SOCKET_SPLICE */ 219 sbrelease(&so->so_snd); 220 sorflush(so); 221 pool_put(&socket_pool, so); 222 } 223 224 /* 225 * Close a socket on last file table reference removal. 226 * Initiate disconnect if connected. 227 * Free socket when disconnect complete. 228 */ 229 int 230 soclose(struct socket *so) 231 { 232 struct socket *so2; 233 int s, error = 0; 234 235 NET_LOCK(s); 236 if (so->so_options & SO_ACCEPTCONN) { 237 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 238 (void) soqremque(so2, 0); 239 (void) soabort(so2); 240 } 241 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 242 (void) soqremque(so2, 1); 243 (void) soabort(so2); 244 } 245 } 246 if (so->so_pcb == 0) 247 goto discard; 248 if (so->so_state & SS_ISCONNECTED) { 249 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 250 error = sodisconnect(so); 251 if (error) 252 goto drop; 253 } 254 if (so->so_options & SO_LINGER) { 255 if ((so->so_state & SS_ISDISCONNECTING) && 256 (so->so_state & SS_NBIO)) 257 goto drop; 258 while (so->so_state & SS_ISCONNECTED) { 259 error = tsleep(&so->so_timeo, 260 PSOCK | PCATCH, "netcls", 261 so->so_linger * hz); 262 if (error) 263 break; 264 } 265 } 266 } 267 drop: 268 if (so->so_pcb) { 269 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 270 NULL, NULL, curproc); 271 if (error == 0) 272 error = error2; 273 } 274 discard: 275 if (so->so_state & SS_NOFDREF) 276 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 277 so->so_state |= SS_NOFDREF; 278 sofree(so); 279 NET_UNLOCK(s); 280 return (error); 281 } 282 283 int 284 soabort(struct socket *so) 285 { 286 NET_ASSERT_LOCKED(); 287 288 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 289 curproc); 290 } 291 292 int 293 soaccept(struct socket *so, struct mbuf *nam) 294 { 295 int error = 0; 296 297 NET_ASSERT_LOCKED(); 298 299 if ((so->so_state & SS_NOFDREF) == 0) 300 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 301 so->so_state &= ~SS_NOFDREF; 302 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 303 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 304 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 305 nam, NULL, curproc); 306 else 307 error = ECONNABORTED; 308 return (error); 309 } 310 311 int 312 soconnect(struct socket *so, struct mbuf *nam) 313 { 314 int s, error; 315 316 if (so->so_options & SO_ACCEPTCONN) 317 return (EOPNOTSUPP); 318 NET_LOCK(s); 319 /* 320 * If protocol is connection-based, can only connect once. 321 * Otherwise, if connected, try to disconnect first. 322 * This allows user to disconnect by connecting to, e.g., 323 * a null address. 324 */ 325 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 326 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 327 (error = sodisconnect(so)))) 328 error = EISCONN; 329 else 330 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 331 NULL, nam, NULL, curproc); 332 NET_UNLOCK(s); 333 return (error); 334 } 335 336 int 337 soconnect2(struct socket *so1, struct socket *so2) 338 { 339 int s, error; 340 341 NET_LOCK(s); 342 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 343 (struct mbuf *)so2, NULL, curproc); 344 NET_UNLOCK(s); 345 return (error); 346 } 347 348 int 349 sodisconnect(struct socket *so) 350 { 351 int error; 352 353 NET_ASSERT_LOCKED(); 354 355 if ((so->so_state & SS_ISCONNECTED) == 0) 356 return (ENOTCONN); 357 if (so->so_state & SS_ISDISCONNECTING) 358 return (EALREADY); 359 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 360 NULL, curproc); 361 return (error); 362 } 363 364 int m_getuio(struct mbuf **, int, long, struct uio *); 365 366 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 367 /* 368 * Send on a socket. 369 * If send must go all at once and message is larger than 370 * send buffering, then hard error. 371 * Lock against other senders. 372 * If must go all at once and not enough room now, then 373 * inform user that this would block and do nothing. 374 * Otherwise, if nonblocking, send as much as possible. 375 * The data to be sent is described by "uio" if nonzero, 376 * otherwise by the mbuf chain "top" (which must be null 377 * if uio is not). Data provided in mbuf chain must be small 378 * enough to send all at once. 379 * 380 * Returns nonzero on error, timeout or signal; callers 381 * must check for short counts if EINTR/ERESTART are returned. 382 * Data and control buffers are freed on return. 383 */ 384 int 385 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 386 struct mbuf *control, int flags) 387 { 388 long space, clen = 0; 389 size_t resid; 390 int error, s; 391 int atomic = sosendallatonce(so) || top; 392 393 if (uio) 394 resid = uio->uio_resid; 395 else 396 resid = top->m_pkthdr.len; 397 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 398 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 399 error = EINVAL; 400 goto out; 401 } 402 if (uio && uio->uio_procp) 403 uio->uio_procp->p_ru.ru_msgsnd++; 404 if (control) { 405 /* 406 * In theory clen should be unsigned (since control->m_len is). 407 * However, space must be signed, as it might be less than 0 408 * if we over-committed, and we must use a signed comparison 409 * of space and clen. 410 */ 411 clen = control->m_len; 412 /* reserve extra space for AF_LOCAL's internalize */ 413 if (so->so_proto->pr_domain->dom_family == AF_LOCAL && 414 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 415 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 416 clen = CMSG_SPACE( 417 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 418 (sizeof(struct file *) / sizeof(int))); 419 } 420 421 #define snderr(errno) { error = errno; NET_UNLOCK(s); goto release; } 422 423 restart: 424 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags), NULL)) != 0) 425 goto out; 426 so->so_state |= SS_ISSENDING; 427 do { 428 NET_LOCK(s); 429 if (so->so_state & SS_CANTSENDMORE) 430 snderr(EPIPE); 431 if (so->so_error) { 432 error = so->so_error; 433 so->so_error = 0; 434 snderr(error); 435 } 436 if ((so->so_state & SS_ISCONNECTED) == 0) { 437 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 438 if (!(resid == 0 && clen != 0)) 439 snderr(ENOTCONN); 440 } else if (addr == 0) 441 snderr(EDESTADDRREQ); 442 } 443 space = sbspace(&so->so_snd); 444 if (flags & MSG_OOB) 445 space += 1024; 446 if ((atomic && resid > so->so_snd.sb_hiwat) || 447 (so->so_proto->pr_domain->dom_family != AF_LOCAL && 448 clen > so->so_snd.sb_hiwat)) 449 snderr(EMSGSIZE); 450 if (space < clen || 451 (space - clen < resid && 452 (atomic || space < so->so_snd.sb_lowat))) { 453 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) 454 snderr(EWOULDBLOCK); 455 sbunlock(&so->so_snd); 456 error = sbwait(&so->so_snd); 457 so->so_state &= ~SS_ISSENDING; 458 NET_UNLOCK(s); 459 if (error) 460 goto out; 461 goto restart; 462 } 463 NET_UNLOCK(s); 464 space -= clen; 465 do { 466 if (uio == NULL) { 467 /* 468 * Data is prepackaged in "top". 469 */ 470 resid = 0; 471 if (flags & MSG_EOR) 472 top->m_flags |= M_EOR; 473 } else { 474 error = m_getuio(&top, atomic, 475 space, uio); 476 if (error) 477 goto release; 478 space -= top->m_pkthdr.len; 479 resid = uio->uio_resid; 480 if (flags & MSG_EOR) 481 top->m_flags |= M_EOR; 482 } 483 NET_LOCK(s); 484 if (resid == 0) 485 so->so_state &= ~SS_ISSENDING; 486 error = (*so->so_proto->pr_usrreq)(so, 487 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 488 top, addr, control, curproc); 489 NET_UNLOCK(s); 490 clen = 0; 491 control = NULL; 492 top = NULL; 493 if (error) 494 goto release; 495 } while (resid && space > 0); 496 } while (resid); 497 498 release: 499 so->so_state &= ~SS_ISSENDING; 500 sbunlock(&so->so_snd); 501 out: 502 m_freem(top); 503 m_freem(control); 504 return (error); 505 } 506 507 int 508 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 509 { 510 struct mbuf *m, *top = NULL; 511 struct mbuf **nextp = ⊤ 512 u_long len, mlen; 513 size_t resid = uio->uio_resid; 514 int error; 515 516 do { 517 if (top == NULL) { 518 MGETHDR(m, M_WAIT, MT_DATA); 519 mlen = MHLEN; 520 m->m_pkthdr.len = 0; 521 m->m_pkthdr.ph_ifidx = 0; 522 } else { 523 MGET(m, M_WAIT, MT_DATA); 524 mlen = MLEN; 525 } 526 /* chain mbuf together */ 527 *nextp = m; 528 nextp = &m->m_next; 529 530 resid = ulmin(resid, space); 531 if (resid >= MINCLSIZE) { 532 MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES)); 533 if ((m->m_flags & M_EXT) == 0) 534 MCLGETI(m, M_NOWAIT, NULL, MCLBYTES); 535 if ((m->m_flags & M_EXT) == 0) 536 goto nopages; 537 mlen = m->m_ext.ext_size; 538 len = ulmin(mlen, resid); 539 /* 540 * For datagram protocols, leave room 541 * for protocol headers in first mbuf. 542 */ 543 if (atomic && top == NULL && len < mlen - max_hdr) 544 m->m_data += max_hdr; 545 } else { 546 nopages: 547 len = ulmin(mlen, resid); 548 /* 549 * For datagram protocols, leave room 550 * for protocol headers in first mbuf. 551 */ 552 if (atomic && top == NULL && len < mlen - max_hdr) 553 MH_ALIGN(m, len); 554 } 555 556 error = uiomove(mtod(m, caddr_t), len, uio); 557 if (error) { 558 m_freem(top); 559 return (error); 560 } 561 562 /* adjust counters */ 563 resid = uio->uio_resid; 564 space -= len; 565 m->m_len = len; 566 top->m_pkthdr.len += len; 567 568 /* Is there more space and more data? */ 569 } while (space > 0 && resid > 0); 570 571 *mp = top; 572 return 0; 573 } 574 575 /* 576 * Following replacement or removal of the first mbuf on the first 577 * mbuf chain of a socket buffer, push necessary state changes back 578 * into the socket buffer so that other consumers see the values 579 * consistently. 'nextrecord' is the callers locally stored value of 580 * the original value of sb->sb_mb->m_nextpkt which must be restored 581 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 582 */ 583 void 584 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 585 { 586 587 /* 588 * First, update for the new value of nextrecord. If necessary, 589 * make it the first record. 590 */ 591 if (sb->sb_mb != NULL) 592 sb->sb_mb->m_nextpkt = nextrecord; 593 else 594 sb->sb_mb = nextrecord; 595 596 /* 597 * Now update any dependent socket buffer fields to reflect 598 * the new state. This is an inline of SB_EMPTY_FIXUP, with 599 * the addition of a second clause that takes care of the 600 * case where sb_mb has been updated, but remains the last 601 * record. 602 */ 603 if (sb->sb_mb == NULL) { 604 sb->sb_mbtail = NULL; 605 sb->sb_lastrecord = NULL; 606 } else if (sb->sb_mb->m_nextpkt == NULL) 607 sb->sb_lastrecord = sb->sb_mb; 608 } 609 610 /* 611 * Implement receive operations on a socket. 612 * We depend on the way that records are added to the sockbuf 613 * by sbappend*. In particular, each record (mbufs linked through m_next) 614 * must begin with an address if the protocol so specifies, 615 * followed by an optional mbuf or mbufs containing ancillary data, 616 * and then zero or more mbufs of data. 617 * In order to avoid blocking network for the entire time here, we splx() 618 * and release NET_LOCK() while doing the actual copy to user space. 619 * Although the sockbuf is locked, new data may still be appended, 620 * and thus we must maintain consistency of the sockbuf during that time. 621 * 622 * The caller may receive the data as a single mbuf chain by supplying 623 * an mbuf **mp0 for use in returning the chain. The uio is then used 624 * only for the count in uio_resid. 625 */ 626 int 627 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 628 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 629 socklen_t controllen) 630 { 631 struct mbuf *m, **mp; 632 struct mbuf *cm; 633 u_long len, offset, moff; 634 int flags, error, s, type, uio_error = 0; 635 struct protosw *pr = so->so_proto; 636 struct mbuf *nextrecord; 637 size_t resid, orig_resid = uio->uio_resid; 638 639 mp = mp0; 640 if (paddr) 641 *paddr = 0; 642 if (controlp) 643 *controlp = 0; 644 if (flagsp) 645 flags = *flagsp &~ MSG_EOR; 646 else 647 flags = 0; 648 if (so->so_state & SS_NBIO) 649 flags |= MSG_DONTWAIT; 650 if (flags & MSG_OOB) { 651 m = m_get(M_WAIT, MT_DATA); 652 NET_LOCK(s); 653 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 654 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 655 NET_UNLOCK(s); 656 if (error) 657 goto bad; 658 do { 659 error = uiomove(mtod(m, caddr_t), 660 ulmin(uio->uio_resid, m->m_len), uio); 661 m = m_free(m); 662 } while (uio->uio_resid && error == 0 && m); 663 bad: 664 m_freem(m); 665 return (error); 666 } 667 if (mp) 668 *mp = NULL; 669 670 restart: 671 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags), NULL)) != 0) 672 return (error); 673 NET_LOCK(s); 674 675 m = so->so_rcv.sb_mb; 676 #ifdef SOCKET_SPLICE 677 if (isspliced(so)) 678 m = NULL; 679 #endif /* SOCKET_SPLICE */ 680 /* 681 * If we have less data than requested, block awaiting more 682 * (subject to any timeout) if: 683 * 1. the current count is less than the low water mark, 684 * 2. MSG_WAITALL is set, and it is possible to do the entire 685 * receive operation at once if we block (resid <= hiwat), or 686 * 3. MSG_DONTWAIT is not set. 687 * If MSG_WAITALL is set but resid is larger than the receive buffer, 688 * we have to do the receive in sections, and thus risk returning 689 * a short count if a timeout or signal occurs after we start. 690 */ 691 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 692 so->so_rcv.sb_cc < uio->uio_resid) && 693 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 694 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 695 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 696 #ifdef DIAGNOSTIC 697 if (m == NULL && so->so_rcv.sb_cc) 698 #ifdef SOCKET_SPLICE 699 if (!isspliced(so)) 700 #endif /* SOCKET_SPLICE */ 701 panic("receive 1: so %p, so_type %d, sb_cc %lu", 702 so, so->so_type, so->so_rcv.sb_cc); 703 #endif 704 if (so->so_error) { 705 if (m) 706 goto dontblock; 707 error = so->so_error; 708 if ((flags & MSG_PEEK) == 0) 709 so->so_error = 0; 710 goto release; 711 } 712 if (so->so_state & SS_CANTRCVMORE) { 713 if (m) 714 goto dontblock; 715 else if (so->so_rcv.sb_cc == 0) 716 goto release; 717 } 718 for (; m; m = m->m_next) 719 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 720 m = so->so_rcv.sb_mb; 721 goto dontblock; 722 } 723 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 724 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 725 error = ENOTCONN; 726 goto release; 727 } 728 if (uio->uio_resid == 0 && controlp == NULL) 729 goto release; 730 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 731 error = EWOULDBLOCK; 732 goto release; 733 } 734 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 735 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 736 sbunlock(&so->so_rcv); 737 error = sbwait(&so->so_rcv); 738 NET_UNLOCK(s); 739 if (error) 740 return (error); 741 goto restart; 742 } 743 dontblock: 744 /* 745 * On entry here, m points to the first record of the socket buffer. 746 * From this point onward, we maintain 'nextrecord' as a cache of the 747 * pointer to the next record in the socket buffer. We must keep the 748 * various socket buffer pointers and local stack versions of the 749 * pointers in sync, pushing out modifications before operations that 750 * may sleep, and re-reading them afterwards. 751 * 752 * Otherwise, we will race with the network stack appending new data 753 * or records onto the socket buffer by using inconsistent/stale 754 * versions of the field, possibly resulting in socket buffer 755 * corruption. 756 */ 757 if (uio->uio_procp) 758 uio->uio_procp->p_ru.ru_msgrcv++; 759 KASSERT(m == so->so_rcv.sb_mb); 760 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 761 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 762 nextrecord = m->m_nextpkt; 763 if (pr->pr_flags & PR_ADDR) { 764 #ifdef DIAGNOSTIC 765 if (m->m_type != MT_SONAME) 766 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 767 so, so->so_type, m, m->m_type); 768 #endif 769 orig_resid = 0; 770 if (flags & MSG_PEEK) { 771 if (paddr) 772 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 773 m = m->m_next; 774 } else { 775 sbfree(&so->so_rcv, m); 776 if (paddr) { 777 *paddr = m; 778 so->so_rcv.sb_mb = m->m_next; 779 m->m_next = 0; 780 m = so->so_rcv.sb_mb; 781 } else { 782 so->so_rcv.sb_mb = m_free(m); 783 m = so->so_rcv.sb_mb; 784 } 785 sbsync(&so->so_rcv, nextrecord); 786 } 787 } 788 while (m && m->m_type == MT_CONTROL && error == 0) { 789 if (flags & MSG_PEEK) { 790 if (controlp) 791 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 792 m = m->m_next; 793 } else { 794 sbfree(&so->so_rcv, m); 795 so->so_rcv.sb_mb = m->m_next; 796 m->m_nextpkt = m->m_next = NULL; 797 cm = m; 798 m = so->so_rcv.sb_mb; 799 sbsync(&so->so_rcv, nextrecord); 800 if (controlp) { 801 if (pr->pr_domain->dom_externalize && 802 mtod(cm, struct cmsghdr *)->cmsg_type == 803 SCM_RIGHTS) 804 error = (*pr->pr_domain->dom_externalize)(cm, 805 controllen, flags); 806 *controlp = cm; 807 } else { 808 /* 809 * Dispose of any SCM_RIGHTS message that went 810 * through the read path rather than recv. 811 */ 812 if (pr->pr_domain->dom_dispose && 813 mtod(cm, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 814 pr->pr_domain->dom_dispose(cm); 815 m_free(cm); 816 } 817 } 818 if (m != NULL) 819 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 820 else 821 nextrecord = so->so_rcv.sb_mb; 822 if (controlp) { 823 orig_resid = 0; 824 controlp = &(*controlp)->m_next; 825 } 826 } 827 828 /* If m is non-NULL, we have some data to read. */ 829 if (m) { 830 type = m->m_type; 831 if (type == MT_OOBDATA) 832 flags |= MSG_OOB; 833 if (m->m_flags & M_BCAST) 834 flags |= MSG_BCAST; 835 if (m->m_flags & M_MCAST) 836 flags |= MSG_MCAST; 837 } 838 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 839 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 840 841 moff = 0; 842 offset = 0; 843 while (m && uio->uio_resid > 0 && error == 0) { 844 if (m->m_type == MT_OOBDATA) { 845 if (type != MT_OOBDATA) 846 break; 847 } else if (type == MT_OOBDATA) 848 break; 849 #ifdef DIAGNOSTIC 850 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 851 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 852 so, so->so_type, m, m->m_type); 853 #endif 854 so->so_state &= ~SS_RCVATMARK; 855 len = uio->uio_resid; 856 if (so->so_oobmark && len > so->so_oobmark - offset) 857 len = so->so_oobmark - offset; 858 if (len > m->m_len - moff) 859 len = m->m_len - moff; 860 /* 861 * If mp is set, just pass back the mbufs. 862 * Otherwise copy them out via the uio, then free. 863 * Sockbuf must be consistent here (points to current mbuf, 864 * it points to next record) when we drop priority; 865 * we must note any additions to the sockbuf when we 866 * block interrupts again. 867 */ 868 if (mp == NULL && uio_error == 0) { 869 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 870 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 871 resid = uio->uio_resid; 872 NET_UNLOCK(s); 873 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 874 NET_LOCK(s); 875 if (uio_error) 876 uio->uio_resid = resid - len; 877 } else 878 uio->uio_resid -= len; 879 if (len == m->m_len - moff) { 880 if (m->m_flags & M_EOR) 881 flags |= MSG_EOR; 882 if (flags & MSG_PEEK) { 883 m = m->m_next; 884 moff = 0; 885 } else { 886 nextrecord = m->m_nextpkt; 887 sbfree(&so->so_rcv, m); 888 if (mp) { 889 *mp = m; 890 mp = &m->m_next; 891 so->so_rcv.sb_mb = m = m->m_next; 892 *mp = NULL; 893 } else { 894 so->so_rcv.sb_mb = m_free(m); 895 m = so->so_rcv.sb_mb; 896 } 897 /* 898 * If m != NULL, we also know that 899 * so->so_rcv.sb_mb != NULL. 900 */ 901 KASSERT(so->so_rcv.sb_mb == m); 902 if (m) { 903 m->m_nextpkt = nextrecord; 904 if (nextrecord == NULL) 905 so->so_rcv.sb_lastrecord = m; 906 } else { 907 so->so_rcv.sb_mb = nextrecord; 908 SB_EMPTY_FIXUP(&so->so_rcv); 909 } 910 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 911 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 912 } 913 } else { 914 if (flags & MSG_PEEK) 915 moff += len; 916 else { 917 if (mp) 918 *mp = m_copym(m, 0, len, M_WAIT); 919 m->m_data += len; 920 m->m_len -= len; 921 so->so_rcv.sb_cc -= len; 922 so->so_rcv.sb_datacc -= len; 923 } 924 } 925 if (so->so_oobmark) { 926 if ((flags & MSG_PEEK) == 0) { 927 so->so_oobmark -= len; 928 if (so->so_oobmark == 0) { 929 so->so_state |= SS_RCVATMARK; 930 break; 931 } 932 } else { 933 offset += len; 934 if (offset == so->so_oobmark) 935 break; 936 } 937 } 938 if (flags & MSG_EOR) 939 break; 940 /* 941 * If the MSG_WAITALL flag is set (for non-atomic socket), 942 * we must not quit until "uio->uio_resid == 0" or an error 943 * termination. If a signal/timeout occurs, return 944 * with a short count but without error. 945 * Keep sockbuf locked against other readers. 946 */ 947 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 948 !sosendallatonce(so) && !nextrecord) { 949 if (so->so_error || so->so_state & SS_CANTRCVMORE) 950 break; 951 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 952 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 953 error = sbwait(&so->so_rcv); 954 if (error) { 955 sbunlock(&so->so_rcv); 956 NET_UNLOCK(s); 957 return (0); 958 } 959 if ((m = so->so_rcv.sb_mb) != NULL) 960 nextrecord = m->m_nextpkt; 961 } 962 } 963 964 if (m && pr->pr_flags & PR_ATOMIC) { 965 flags |= MSG_TRUNC; 966 if ((flags & MSG_PEEK) == 0) 967 (void) sbdroprecord(&so->so_rcv); 968 } 969 if ((flags & MSG_PEEK) == 0) { 970 if (m == NULL) { 971 /* 972 * First part is an inline SB_EMPTY_FIXUP(). Second 973 * part makes sure sb_lastrecord is up-to-date if 974 * there is still data in the socket buffer. 975 */ 976 so->so_rcv.sb_mb = nextrecord; 977 if (so->so_rcv.sb_mb == NULL) { 978 so->so_rcv.sb_mbtail = NULL; 979 so->so_rcv.sb_lastrecord = NULL; 980 } else if (nextrecord->m_nextpkt == NULL) 981 so->so_rcv.sb_lastrecord = nextrecord; 982 } 983 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 984 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 985 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 986 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 987 (struct mbuf *)(long)flags, NULL, curproc); 988 } 989 if (orig_resid == uio->uio_resid && orig_resid && 990 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 991 sbunlock(&so->so_rcv); 992 NET_UNLOCK(s); 993 goto restart; 994 } 995 996 if (uio_error) 997 error = uio_error; 998 999 if (flagsp) 1000 *flagsp |= flags; 1001 release: 1002 sbunlock(&so->so_rcv); 1003 NET_UNLOCK(s); 1004 return (error); 1005 } 1006 1007 int 1008 soshutdown(struct socket *so, int how) 1009 { 1010 struct protosw *pr = so->so_proto; 1011 int s, error = 0; 1012 1013 NET_LOCK(s); 1014 switch (how) { 1015 case SHUT_RD: 1016 case SHUT_RDWR: 1017 sorflush(so); 1018 if (how == SHUT_RD) 1019 break; 1020 /* FALLTHROUGH */ 1021 case SHUT_WR: 1022 error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 1023 curproc); 1024 break; 1025 default: 1026 error = EINVAL; 1027 break; 1028 } 1029 NET_UNLOCK(s); 1030 1031 return (error); 1032 } 1033 1034 void 1035 sorflush(struct socket *so) 1036 { 1037 struct sockbuf *sb = &so->so_rcv; 1038 struct protosw *pr = so->so_proto; 1039 struct sockbuf asb; 1040 1041 sb->sb_flags |= SB_NOINTR; 1042 (void) sblock(sb, M_WAITOK, NULL); 1043 socantrcvmore(so); 1044 sbunlock(sb); 1045 asb = *sb; 1046 memset(sb, 0, sizeof (*sb)); 1047 /* XXX - the memset stomps all over so_rcv */ 1048 if (asb.sb_flags & SB_KNOTE) { 1049 sb->sb_sel.si_note = asb.sb_sel.si_note; 1050 sb->sb_flags = SB_KNOTE; 1051 } 1052 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1053 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1054 sbrelease(&asb); 1055 } 1056 1057 #ifdef SOCKET_SPLICE 1058 1059 #define so_splicelen so_sp->ssp_len 1060 #define so_splicemax so_sp->ssp_max 1061 #define so_idletv so_sp->ssp_idletv 1062 #define so_idleto so_sp->ssp_idleto 1063 #define so_splicetask so_sp->ssp_task 1064 1065 int 1066 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1067 { 1068 struct file *fp; 1069 struct socket *sosp; 1070 int s, error = 0; 1071 1072 if (sosplice_taskq == NULL) 1073 sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 1074 TASKQ_CANTSLEEP); 1075 if (sosplice_taskq == NULL) 1076 return (ENOMEM); 1077 1078 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1079 return (EPROTONOSUPPORT); 1080 if (so->so_options & SO_ACCEPTCONN) 1081 return (EOPNOTSUPP); 1082 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1083 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1084 return (ENOTCONN); 1085 if (so->so_sp == NULL) 1086 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1087 1088 /* If no fd is given, unsplice by removing existing link. */ 1089 if (fd < 0) { 1090 /* Lock receive buffer. */ 1091 if ((error = sblock(&so->so_rcv, 1092 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK, NULL)) != 0) 1093 return (error); 1094 NET_LOCK(s); 1095 if (so->so_sp->ssp_socket) 1096 sounsplice(so, so->so_sp->ssp_socket, 1); 1097 NET_UNLOCK(s); 1098 sbunlock(&so->so_rcv); 1099 return (0); 1100 } 1101 1102 if (max && max < 0) 1103 return (EINVAL); 1104 1105 if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0)) 1106 return (EINVAL); 1107 1108 /* Find sosp, the drain socket where data will be spliced into. */ 1109 if ((error = getsock(curproc, fd, &fp)) != 0) 1110 return (error); 1111 sosp = fp->f_data; 1112 if (sosp->so_sp == NULL) 1113 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1114 1115 /* Lock both receive and send buffer. */ 1116 if ((error = sblock(&so->so_rcv, 1117 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK, NULL)) != 0) { 1118 FRELE(fp, curproc); 1119 return (error); 1120 } 1121 if ((error = sblock(&sosp->so_snd, M_WAITOK, NULL)) != 0) { 1122 sbunlock(&so->so_rcv); 1123 FRELE(fp, curproc); 1124 return (error); 1125 } 1126 NET_LOCK(s); 1127 1128 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1129 error = EBUSY; 1130 goto release; 1131 } 1132 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1133 error = EPROTONOSUPPORT; 1134 goto release; 1135 } 1136 if (sosp->so_options & SO_ACCEPTCONN) { 1137 error = EOPNOTSUPP; 1138 goto release; 1139 } 1140 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1141 error = ENOTCONN; 1142 goto release; 1143 } 1144 1145 /* Splice so and sosp together. */ 1146 so->so_sp->ssp_socket = sosp; 1147 sosp->so_sp->ssp_soback = so; 1148 so->so_splicelen = 0; 1149 so->so_splicemax = max; 1150 if (tv) 1151 so->so_idletv = *tv; 1152 else 1153 timerclear(&so->so_idletv); 1154 timeout_set(&so->so_idleto, soidle, so); 1155 task_set(&so->so_splicetask, sotask, so); 1156 1157 /* 1158 * To prevent softnet interrupt from calling somove() while 1159 * we sleep, the socket buffers are not marked as spliced yet. 1160 */ 1161 if (somove(so, M_WAIT)) { 1162 so->so_rcv.sb_flagsintr |= SB_SPLICE; 1163 sosp->so_snd.sb_flagsintr |= SB_SPLICE; 1164 } 1165 1166 release: 1167 NET_UNLOCK(s); 1168 sbunlock(&sosp->so_snd); 1169 sbunlock(&so->so_rcv); 1170 FRELE(fp, curproc); 1171 return (error); 1172 } 1173 1174 void 1175 sounsplice(struct socket *so, struct socket *sosp, int wakeup) 1176 { 1177 NET_ASSERT_LOCKED(); 1178 1179 task_del(sosplice_taskq, &so->so_splicetask); 1180 timeout_del(&so->so_idleto); 1181 sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; 1182 so->so_rcv.sb_flagsintr &= ~SB_SPLICE; 1183 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1184 if (wakeup && soreadable(so)) 1185 sorwakeup(so); 1186 } 1187 1188 void 1189 soidle(void *arg) 1190 { 1191 struct socket *so = arg; 1192 int s; 1193 1194 NET_LOCK(s); 1195 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1196 so->so_error = ETIMEDOUT; 1197 sounsplice(so, so->so_sp->ssp_socket, 1); 1198 } 1199 NET_UNLOCK(s); 1200 } 1201 1202 void 1203 sotask(void *arg) 1204 { 1205 struct socket *so = arg; 1206 int s; 1207 1208 NET_LOCK(s); 1209 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1210 /* 1211 * We may not sleep here as sofree() and unsplice() may be 1212 * called from softnet interrupt context. This would remove 1213 * the socket during somove(). 1214 */ 1215 somove(so, M_DONTWAIT); 1216 } 1217 NET_UNLOCK(s); 1218 1219 /* Avoid user land starvation. */ 1220 yield(); 1221 } 1222 1223 /* 1224 * Move data from receive buffer of spliced source socket to send 1225 * buffer of drain socket. Try to move as much as possible in one 1226 * big chunk. It is a TCP only implementation. 1227 * Return value 0 means splicing has been finished, 1 continue. 1228 */ 1229 int 1230 somove(struct socket *so, int wait) 1231 { 1232 struct socket *sosp = so->so_sp->ssp_socket; 1233 struct mbuf *m, **mp, *nextrecord; 1234 u_long len, off, oobmark; 1235 long space; 1236 int error = 0, maxreached = 0; 1237 short state; 1238 1239 NET_ASSERT_LOCKED(); 1240 1241 nextpkt: 1242 if (so->so_error) { 1243 error = so->so_error; 1244 goto release; 1245 } 1246 if (sosp->so_state & SS_CANTSENDMORE) { 1247 error = EPIPE; 1248 goto release; 1249 } 1250 if (sosp->so_error && sosp->so_error != ETIMEDOUT && 1251 sosp->so_error != EFBIG && sosp->so_error != ELOOP) { 1252 error = sosp->so_error; 1253 goto release; 1254 } 1255 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1256 goto release; 1257 1258 /* Calculate how many bytes can be copied now. */ 1259 len = so->so_rcv.sb_datacc; 1260 if (so->so_splicemax) { 1261 KASSERT(so->so_splicelen < so->so_splicemax); 1262 if (so->so_splicemax <= so->so_splicelen + len) { 1263 len = so->so_splicemax - so->so_splicelen; 1264 maxreached = 1; 1265 } 1266 } 1267 space = sbspace(&sosp->so_snd); 1268 if (so->so_oobmark && so->so_oobmark < len && 1269 so->so_oobmark < space + 1024) 1270 space += 1024; 1271 if (space <= 0) { 1272 maxreached = 0; 1273 goto release; 1274 } 1275 if (space < len) { 1276 maxreached = 0; 1277 if (space < sosp->so_snd.sb_lowat) 1278 goto release; 1279 len = space; 1280 } 1281 sosp->so_state |= SS_ISSENDING; 1282 1283 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1284 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1285 m = so->so_rcv.sb_mb; 1286 if (m == NULL) 1287 goto release; 1288 nextrecord = m->m_nextpkt; 1289 1290 /* Drop address and control information not used with splicing. */ 1291 if (so->so_proto->pr_flags & PR_ADDR) { 1292 #ifdef DIAGNOSTIC 1293 if (m->m_type != MT_SONAME) 1294 panic("somove soname: so %p, so_type %d, m %p, " 1295 "m_type %d", so, so->so_type, m, m->m_type); 1296 #endif 1297 m = m->m_next; 1298 } 1299 while (m && m->m_type == MT_CONTROL) 1300 m = m->m_next; 1301 if (m == NULL) { 1302 sbdroprecord(&so->so_rcv); 1303 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1304 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1305 NULL, NULL, NULL); 1306 goto nextpkt; 1307 } 1308 1309 /* 1310 * By splicing sockets connected to localhost, userland might create a 1311 * loop. Dissolve splicing with error if loop is detected by counter. 1312 */ 1313 if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 1314 error = ELOOP; 1315 goto release; 1316 } 1317 1318 if (so->so_proto->pr_flags & PR_ATOMIC) { 1319 if ((m->m_flags & M_PKTHDR) == 0) 1320 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1321 "m_type %d", so, so->so_type, m, m->m_type); 1322 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1323 error = EMSGSIZE; 1324 goto release; 1325 } 1326 if (len < m->m_pkthdr.len) 1327 goto release; 1328 if (m->m_pkthdr.len < len) { 1329 maxreached = 0; 1330 len = m->m_pkthdr.len; 1331 } 1332 /* 1333 * Throw away the name mbuf after it has been assured 1334 * that the whole first record can be processed. 1335 */ 1336 m = so->so_rcv.sb_mb; 1337 sbfree(&so->so_rcv, m); 1338 so->so_rcv.sb_mb = m_free(m); 1339 sbsync(&so->so_rcv, nextrecord); 1340 } 1341 /* 1342 * Throw away the control mbufs after it has been assured 1343 * that the whole first record can be processed. 1344 */ 1345 m = so->so_rcv.sb_mb; 1346 while (m && m->m_type == MT_CONTROL) { 1347 sbfree(&so->so_rcv, m); 1348 so->so_rcv.sb_mb = m_free(m); 1349 m = so->so_rcv.sb_mb; 1350 sbsync(&so->so_rcv, nextrecord); 1351 } 1352 1353 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1354 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1355 1356 /* Take at most len mbufs out of receive buffer. */ 1357 for (off = 0, mp = &m; off <= len && *mp; 1358 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1359 u_long size = len - off; 1360 1361 #ifdef DIAGNOSTIC 1362 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1363 panic("somove type: so %p, so_type %d, m %p, " 1364 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1365 #endif 1366 if ((*mp)->m_len > size) { 1367 /* 1368 * Move only a partial mbuf at maximum splice length or 1369 * if the drain buffer is too small for this large mbuf. 1370 */ 1371 if (!maxreached && so->so_snd.sb_datacc > 0) { 1372 len -= size; 1373 break; 1374 } 1375 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1376 if (*mp == NULL) { 1377 len -= size; 1378 break; 1379 } 1380 so->so_rcv.sb_mb->m_data += size; 1381 so->so_rcv.sb_mb->m_len -= size; 1382 so->so_rcv.sb_cc -= size; 1383 so->so_rcv.sb_datacc -= size; 1384 } else { 1385 *mp = so->so_rcv.sb_mb; 1386 sbfree(&so->so_rcv, *mp); 1387 so->so_rcv.sb_mb = (*mp)->m_next; 1388 sbsync(&so->so_rcv, nextrecord); 1389 } 1390 } 1391 *mp = NULL; 1392 1393 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1394 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1395 SBCHECK(&so->so_rcv); 1396 if (m == NULL) 1397 goto release; 1398 m->m_nextpkt = NULL; 1399 if (m->m_flags & M_PKTHDR) { 1400 m_resethdr(m); 1401 m->m_pkthdr.len = len; 1402 } 1403 1404 /* Send window update to source peer as receive buffer has changed. */ 1405 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) 1406 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1407 NULL, NULL, NULL); 1408 1409 /* Receive buffer did shrink by len bytes, adjust oob. */ 1410 state = so->so_state; 1411 so->so_state &= ~SS_RCVATMARK; 1412 oobmark = so->so_oobmark; 1413 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1414 if (oobmark) { 1415 if (oobmark == len) 1416 so->so_state |= SS_RCVATMARK; 1417 if (oobmark >= len) 1418 oobmark = 0; 1419 } 1420 1421 /* 1422 * Handle oob data. If any malloc fails, ignore error. 1423 * TCP urgent data is not very reliable anyway. 1424 */ 1425 while (((state & SS_RCVATMARK) || oobmark) && 1426 (so->so_options & SO_OOBINLINE)) { 1427 struct mbuf *o = NULL; 1428 1429 if (state & SS_RCVATMARK) { 1430 o = m_get(wait, MT_DATA); 1431 state &= ~SS_RCVATMARK; 1432 } else if (oobmark) { 1433 o = m_split(m, oobmark, wait); 1434 if (o) { 1435 error = (*sosp->so_proto->pr_usrreq)(sosp, 1436 PRU_SEND, m, NULL, NULL, NULL); 1437 if (error) { 1438 if (sosp->so_state & SS_CANTSENDMORE) 1439 error = EPIPE; 1440 m_freem(o); 1441 goto release; 1442 } 1443 len -= oobmark; 1444 so->so_splicelen += oobmark; 1445 m = o; 1446 o = m_get(wait, MT_DATA); 1447 } 1448 oobmark = 0; 1449 } 1450 if (o) { 1451 o->m_len = 1; 1452 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1453 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1454 o, NULL, NULL, NULL); 1455 if (error) { 1456 if (sosp->so_state & SS_CANTSENDMORE) 1457 error = EPIPE; 1458 m_freem(m); 1459 goto release; 1460 } 1461 len -= 1; 1462 so->so_splicelen += 1; 1463 if (oobmark) { 1464 oobmark -= 1; 1465 if (oobmark == 0) 1466 state |= SS_RCVATMARK; 1467 } 1468 m_adj(m, 1); 1469 } 1470 } 1471 1472 /* Append all remaining data to drain socket. */ 1473 if (so->so_rcv.sb_cc == 0 || maxreached) 1474 sosp->so_state &= ~SS_ISSENDING; 1475 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, 1476 NULL); 1477 if (error) { 1478 if (sosp->so_state & SS_CANTSENDMORE) 1479 error = EPIPE; 1480 goto release; 1481 } 1482 so->so_splicelen += len; 1483 1484 /* Move several packets if possible. */ 1485 if (!maxreached && nextrecord) 1486 goto nextpkt; 1487 1488 release: 1489 sosp->so_state &= ~SS_ISSENDING; 1490 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1491 error = EFBIG; 1492 if (error) 1493 so->so_error = error; 1494 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1495 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1496 sounsplice(so, sosp, 1); 1497 return (0); 1498 } 1499 if (timerisset(&so->so_idletv)) 1500 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1501 return (1); 1502 } 1503 1504 #endif /* SOCKET_SPLICE */ 1505 1506 void 1507 sorwakeup(struct socket *so) 1508 { 1509 NET_ASSERT_LOCKED(); 1510 1511 #ifdef SOCKET_SPLICE 1512 if (so->so_rcv.sb_flagsintr & SB_SPLICE) { 1513 /* 1514 * TCP has a sendbuffer that can handle multiple packets 1515 * at once. So queue the stream a bit to accumulate data. 1516 * The sosplice thread will call somove() later and send 1517 * the packets calling tcp_output() only once. 1518 * In the UDP case, send out the packets immediately. 1519 * Using a thread would make things slower. 1520 */ 1521 if (so->so_proto->pr_flags & PR_WANTRCVD) 1522 task_add(sosplice_taskq, &so->so_splicetask); 1523 else 1524 somove(so, M_DONTWAIT); 1525 } 1526 if (isspliced(so)) 1527 return; 1528 #endif 1529 sowakeup(so, &so->so_rcv); 1530 if (so->so_upcall) { 1531 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1532 } 1533 } 1534 1535 void 1536 sowwakeup(struct socket *so) 1537 { 1538 NET_ASSERT_LOCKED(); 1539 1540 #ifdef SOCKET_SPLICE 1541 if (so->so_snd.sb_flagsintr & SB_SPLICE) 1542 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); 1543 #endif 1544 sowakeup(so, &so->so_snd); 1545 } 1546 1547 int 1548 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1549 { 1550 int s, error = 0; 1551 struct mbuf *m = m0; 1552 1553 if (level != SOL_SOCKET) { 1554 if (so->so_proto && so->so_proto->pr_ctloutput) { 1555 NET_LOCK(s); 1556 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1557 level, optname, &m0); 1558 NET_UNLOCK(s); 1559 return (error); 1560 } 1561 error = ENOPROTOOPT; 1562 } else { 1563 switch (optname) { 1564 case SO_BINDANY: 1565 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1566 goto bad; 1567 break; 1568 } 1569 1570 switch (optname) { 1571 1572 case SO_LINGER: 1573 if (m == NULL || m->m_len != sizeof (struct linger) || 1574 mtod(m, struct linger *)->l_linger < 0 || 1575 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1576 error = EINVAL; 1577 goto bad; 1578 } 1579 so->so_linger = mtod(m, struct linger *)->l_linger; 1580 /* FALLTHROUGH */ 1581 1582 case SO_BINDANY: 1583 case SO_DEBUG: 1584 case SO_KEEPALIVE: 1585 case SO_USELOOPBACK: 1586 case SO_BROADCAST: 1587 case SO_REUSEADDR: 1588 case SO_REUSEPORT: 1589 case SO_OOBINLINE: 1590 case SO_TIMESTAMP: 1591 if (m == NULL || m->m_len < sizeof (int)) { 1592 error = EINVAL; 1593 goto bad; 1594 } 1595 if (*mtod(m, int *)) 1596 so->so_options |= optname; 1597 else 1598 so->so_options &= ~optname; 1599 break; 1600 1601 case SO_DONTROUTE: 1602 if (m == NULL || m->m_len < sizeof (int)) { 1603 error = EINVAL; 1604 goto bad; 1605 } 1606 if (*mtod(m, int *)) 1607 error = EOPNOTSUPP; 1608 break; 1609 1610 case SO_SNDBUF: 1611 case SO_RCVBUF: 1612 case SO_SNDLOWAT: 1613 case SO_RCVLOWAT: 1614 { 1615 u_long cnt; 1616 1617 if (m == NULL || m->m_len < sizeof (int)) { 1618 error = EINVAL; 1619 goto bad; 1620 } 1621 cnt = *mtod(m, int *); 1622 if ((long)cnt <= 0) 1623 cnt = 1; 1624 switch (optname) { 1625 1626 case SO_SNDBUF: 1627 if (so->so_state & SS_CANTSENDMORE) { 1628 error = EINVAL; 1629 goto bad; 1630 } 1631 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1632 sbreserve(&so->so_snd, cnt)) { 1633 error = ENOBUFS; 1634 goto bad; 1635 } 1636 so->so_snd.sb_wat = cnt; 1637 break; 1638 1639 case SO_RCVBUF: 1640 if (so->so_state & SS_CANTRCVMORE) { 1641 error = EINVAL; 1642 goto bad; 1643 } 1644 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1645 sbreserve(&so->so_rcv, cnt)) { 1646 error = ENOBUFS; 1647 goto bad; 1648 } 1649 so->so_rcv.sb_wat = cnt; 1650 break; 1651 1652 case SO_SNDLOWAT: 1653 so->so_snd.sb_lowat = 1654 (cnt > so->so_snd.sb_hiwat) ? 1655 so->so_snd.sb_hiwat : cnt; 1656 break; 1657 case SO_RCVLOWAT: 1658 so->so_rcv.sb_lowat = 1659 (cnt > so->so_rcv.sb_hiwat) ? 1660 so->so_rcv.sb_hiwat : cnt; 1661 break; 1662 } 1663 break; 1664 } 1665 1666 case SO_SNDTIMEO: 1667 case SO_RCVTIMEO: 1668 { 1669 struct timeval tv; 1670 int val; 1671 1672 if (m == NULL || m->m_len < sizeof (tv)) { 1673 error = EINVAL; 1674 goto bad; 1675 } 1676 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 1677 val = tvtohz(&tv); 1678 if (val > USHRT_MAX) { 1679 error = EDOM; 1680 goto bad; 1681 } 1682 1683 switch (optname) { 1684 1685 case SO_SNDTIMEO: 1686 so->so_snd.sb_timeo = val; 1687 break; 1688 case SO_RCVTIMEO: 1689 so->so_rcv.sb_timeo = val; 1690 break; 1691 } 1692 break; 1693 } 1694 1695 case SO_RTABLE: 1696 if (so->so_proto && so->so_proto->pr_domain && 1697 so->so_proto->pr_domain->dom_protosw && 1698 so->so_proto->pr_ctloutput) { 1699 struct domain *dom = so->so_proto->pr_domain; 1700 1701 level = dom->dom_protosw->pr_protocol; 1702 NET_LOCK(s); 1703 error = (*so->so_proto->pr_ctloutput) 1704 (PRCO_SETOPT, so, level, optname, &m0); 1705 NET_UNLOCK(s); 1706 return (error); 1707 } 1708 error = ENOPROTOOPT; 1709 break; 1710 1711 #ifdef SOCKET_SPLICE 1712 case SO_SPLICE: 1713 if (m == NULL) { 1714 error = sosplice(so, -1, 0, NULL); 1715 } else if (m->m_len < sizeof(int)) { 1716 error = EINVAL; 1717 goto bad; 1718 } else if (m->m_len < sizeof(struct splice)) { 1719 error = sosplice(so, *mtod(m, int *), 0, NULL); 1720 } else { 1721 error = sosplice(so, 1722 mtod(m, struct splice *)->sp_fd, 1723 mtod(m, struct splice *)->sp_max, 1724 &mtod(m, struct splice *)->sp_idle); 1725 } 1726 break; 1727 #endif /* SOCKET_SPLICE */ 1728 1729 default: 1730 error = ENOPROTOOPT; 1731 break; 1732 } 1733 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1734 NET_LOCK(s); 1735 (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1736 level, optname, &m0); 1737 NET_UNLOCK(s); 1738 m = NULL; /* freed by protocol */ 1739 } 1740 } 1741 bad: 1742 if (m) 1743 (void) m_free(m); 1744 return (error); 1745 } 1746 1747 int 1748 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1749 { 1750 int s, error = 0; 1751 struct mbuf *m; 1752 1753 if (level != SOL_SOCKET) { 1754 if (so->so_proto && so->so_proto->pr_ctloutput) { 1755 NET_LOCK(s); 1756 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 1757 level, optname, mp); 1758 NET_UNLOCK(s); 1759 return (error); 1760 } else 1761 return (ENOPROTOOPT); 1762 } else { 1763 m = m_get(M_WAIT, MT_SOOPTS); 1764 m->m_len = sizeof (int); 1765 1766 switch (optname) { 1767 1768 case SO_LINGER: 1769 m->m_len = sizeof (struct linger); 1770 mtod(m, struct linger *)->l_onoff = 1771 so->so_options & SO_LINGER; 1772 mtod(m, struct linger *)->l_linger = so->so_linger; 1773 break; 1774 1775 case SO_BINDANY: 1776 case SO_USELOOPBACK: 1777 case SO_DEBUG: 1778 case SO_KEEPALIVE: 1779 case SO_REUSEADDR: 1780 case SO_REUSEPORT: 1781 case SO_BROADCAST: 1782 case SO_OOBINLINE: 1783 case SO_TIMESTAMP: 1784 *mtod(m, int *) = so->so_options & optname; 1785 break; 1786 1787 case SO_DONTROUTE: 1788 *mtod(m, int *) = 0; 1789 break; 1790 1791 case SO_TYPE: 1792 *mtod(m, int *) = so->so_type; 1793 break; 1794 1795 case SO_ERROR: 1796 *mtod(m, int *) = so->so_error; 1797 so->so_error = 0; 1798 break; 1799 1800 case SO_SNDBUF: 1801 *mtod(m, int *) = so->so_snd.sb_hiwat; 1802 break; 1803 1804 case SO_RCVBUF: 1805 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1806 break; 1807 1808 case SO_SNDLOWAT: 1809 *mtod(m, int *) = so->so_snd.sb_lowat; 1810 break; 1811 1812 case SO_RCVLOWAT: 1813 *mtod(m, int *) = so->so_rcv.sb_lowat; 1814 break; 1815 1816 case SO_SNDTIMEO: 1817 case SO_RCVTIMEO: 1818 { 1819 struct timeval tv; 1820 int val = (optname == SO_SNDTIMEO ? 1821 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1822 1823 m->m_len = sizeof(struct timeval); 1824 memset(&tv, 0, sizeof(tv)); 1825 tv.tv_sec = val / hz; 1826 tv.tv_usec = (val % hz) * tick; 1827 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 1828 break; 1829 } 1830 1831 case SO_RTABLE: 1832 (void)m_free(m); 1833 if (so->so_proto && so->so_proto->pr_domain && 1834 so->so_proto->pr_domain->dom_protosw && 1835 so->so_proto->pr_ctloutput) { 1836 struct domain *dom = so->so_proto->pr_domain; 1837 1838 level = dom->dom_protosw->pr_protocol; 1839 NET_LOCK(s); 1840 error = (*so->so_proto->pr_ctloutput) 1841 (PRCO_GETOPT, so, level, optname, mp); 1842 NET_UNLOCK(s); 1843 return (error); 1844 } 1845 return (ENOPROTOOPT); 1846 break; 1847 1848 #ifdef SOCKET_SPLICE 1849 case SO_SPLICE: 1850 { 1851 off_t len; 1852 int s = splsoftnet(); 1853 1854 m->m_len = sizeof(off_t); 1855 len = so->so_sp ? so->so_sp->ssp_len : 0; 1856 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 1857 splx(s); 1858 break; 1859 } 1860 #endif /* SOCKET_SPLICE */ 1861 1862 case SO_PEERCRED: 1863 if (so->so_proto->pr_protocol == AF_UNIX) { 1864 struct unpcb *unp = sotounpcb(so); 1865 1866 if (unp->unp_flags & UNP_FEIDS) { 1867 m->m_len = sizeof(unp->unp_connid); 1868 memcpy(mtod(m, caddr_t), 1869 &(unp->unp_connid), m->m_len); 1870 break; 1871 } 1872 (void)m_free(m); 1873 return (ENOTCONN); 1874 } 1875 (void)m_free(m); 1876 return (EOPNOTSUPP); 1877 break; 1878 1879 default: 1880 (void)m_free(m); 1881 return (ENOPROTOOPT); 1882 } 1883 *mp = m; 1884 return (0); 1885 } 1886 } 1887 1888 void 1889 sohasoutofband(struct socket *so) 1890 { 1891 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1892 selwakeup(&so->so_rcv.sb_sel); 1893 } 1894 1895 int 1896 soo_kqfilter(struct file *fp, struct knote *kn) 1897 { 1898 struct socket *so = kn->kn_fp->f_data; 1899 struct sockbuf *sb; 1900 1901 KERNEL_ASSERT_LOCKED(); 1902 1903 switch (kn->kn_filter) { 1904 case EVFILT_READ: 1905 if (so->so_options & SO_ACCEPTCONN) 1906 kn->kn_fop = &solisten_filtops; 1907 else 1908 kn->kn_fop = &soread_filtops; 1909 sb = &so->so_rcv; 1910 break; 1911 case EVFILT_WRITE: 1912 kn->kn_fop = &sowrite_filtops; 1913 sb = &so->so_snd; 1914 break; 1915 default: 1916 return (EINVAL); 1917 } 1918 1919 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1920 sb->sb_flags |= SB_KNOTE; 1921 1922 return (0); 1923 } 1924 1925 void 1926 filt_sordetach(struct knote *kn) 1927 { 1928 struct socket *so = kn->kn_fp->f_data; 1929 1930 KERNEL_ASSERT_LOCKED(); 1931 1932 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1933 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1934 so->so_rcv.sb_flags &= ~SB_KNOTE; 1935 } 1936 1937 int 1938 filt_soread(struct knote *kn, long hint) 1939 { 1940 struct socket *so = kn->kn_fp->f_data; 1941 1942 kn->kn_data = so->so_rcv.sb_cc; 1943 #ifdef SOCKET_SPLICE 1944 if (isspliced(so)) 1945 return (0); 1946 #endif /* SOCKET_SPLICE */ 1947 if (so->so_state & SS_CANTRCVMORE) { 1948 kn->kn_flags |= EV_EOF; 1949 kn->kn_fflags = so->so_error; 1950 return (1); 1951 } 1952 if (so->so_error) /* temporary udp error */ 1953 return (1); 1954 if (kn->kn_sfflags & NOTE_LOWAT) 1955 return (kn->kn_data >= kn->kn_sdata); 1956 return (kn->kn_data >= so->so_rcv.sb_lowat); 1957 } 1958 1959 void 1960 filt_sowdetach(struct knote *kn) 1961 { 1962 struct socket *so = kn->kn_fp->f_data; 1963 1964 KERNEL_ASSERT_LOCKED(); 1965 1966 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1967 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1968 so->so_snd.sb_flags &= ~SB_KNOTE; 1969 } 1970 1971 int 1972 filt_sowrite(struct knote *kn, long hint) 1973 { 1974 struct socket *so = kn->kn_fp->f_data; 1975 1976 kn->kn_data = sbspace(&so->so_snd); 1977 if (so->so_state & SS_CANTSENDMORE) { 1978 kn->kn_flags |= EV_EOF; 1979 kn->kn_fflags = so->so_error; 1980 return (1); 1981 } 1982 if (so->so_error) /* temporary udp error */ 1983 return (1); 1984 if (((so->so_state & SS_ISCONNECTED) == 0) && 1985 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1986 return (0); 1987 if (kn->kn_sfflags & NOTE_LOWAT) 1988 return (kn->kn_data >= kn->kn_sdata); 1989 return (kn->kn_data >= so->so_snd.sb_lowat); 1990 } 1991 1992 int 1993 filt_solisten(struct knote *kn, long hint) 1994 { 1995 struct socket *so = kn->kn_fp->f_data; 1996 1997 kn->kn_data = so->so_qlen; 1998 return (so->so_qlen != 0); 1999 } 2000 2001 #ifdef DDB 2002 void 2003 sobuf_print(struct sockbuf *, 2004 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2005 2006 void 2007 sobuf_print(struct sockbuf *sb, 2008 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2009 { 2010 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2011 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2012 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2013 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2014 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2015 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2016 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2017 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2018 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2019 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2020 (*pr)("\tsb_sel: ...\n"); 2021 (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); 2022 (*pr)("\tsb_flags: %i\n", sb->sb_flags); 2023 (*pr)("\tsb_timeo: %i\n", sb->sb_timeo); 2024 } 2025 2026 void 2027 so_print(void *v, 2028 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2029 { 2030 struct socket *so = v; 2031 2032 (*pr)("socket %p\n", so); 2033 (*pr)("so_type: %i\n", so->so_type); 2034 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2035 (*pr)("so_linger: %i\n", so->so_linger); 2036 (*pr)("so_state: %i\n", so->so_state); 2037 (*pr)("so_pcb: %p\n", so->so_pcb); 2038 (*pr)("so_proto: %p\n", so->so_proto); 2039 2040 (*pr)("so_head: %p\n", so->so_head); 2041 (*pr)("so_onq: %p\n", so->so_onq); 2042 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2043 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2044 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2045 (*pr)("so_q0len: %i\n", so->so_q0len); 2046 (*pr)("so_qlen: %i\n", so->so_qlen); 2047 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2048 (*pr)("so_timeo: %i\n", so->so_timeo); 2049 (*pr)("so_pgid: %i\n", so->so_pgid); 2050 (*pr)("so_siguid: %i\n", so->so_siguid); 2051 (*pr)("so_sigeuid: %i\n", so->so_sigeuid); 2052 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2053 2054 (*pr)("so_sp: %p\n", so->so_sp); 2055 if (so->so_sp != NULL) { 2056 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2057 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2058 (*pr)("\tssp_len: %lld\n", 2059 (unsigned long long)so->so_sp->ssp_len); 2060 (*pr)("\tssp_max: %lld\n", 2061 (unsigned long long)so->so_sp->ssp_max); 2062 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2063 so->so_sp->ssp_idletv.tv_usec); 2064 (*pr)("\tssp_idleto: %spending (@%i)\n", 2065 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2066 so->so_sp->ssp_idleto.to_time); 2067 } 2068 2069 (*pr)("so_rcv:\n"); 2070 sobuf_print(&so->so_rcv, pr); 2071 (*pr)("so_snd:\n"); 2072 sobuf_print(&so->so_snd, pr); 2073 2074 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2075 so->so_upcall, so->so_upcallarg); 2076 2077 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2078 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2079 (*pr)("so_cpid: %d\n", so->so_cpid); 2080 } 2081 #endif 2082 2083