1 /* $OpenBSD: uipc_socket.c,v 1.346 2024/12/15 11:00:05 dlg Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/unpcb.h> 47 #include <sys/socketvar.h> 48 #include <sys/signalvar.h> 49 #include <sys/pool.h> 50 #include <sys/atomic.h> 51 #include <sys/rwlock.h> 52 #include <sys/time.h> 53 #include <sys/refcnt.h> 54 55 #ifdef DDB 56 #include <machine/db_machdep.h> 57 #endif 58 59 void sbsync(struct sockbuf *, struct mbuf *); 60 61 int sosplice(struct socket *, int, off_t, struct timeval *); 62 void sounsplice(struct socket *, struct socket *, int); 63 void soidle(void *); 64 void sotask(void *); 65 void soreaper(void *); 66 void soput(void *); 67 int somove(struct socket *, int); 68 void sorflush(struct socket *); 69 70 void filt_sordetach(struct knote *kn); 71 int filt_soread(struct knote *kn, long hint); 72 void filt_sowdetach(struct knote *kn); 73 int filt_sowrite(struct knote *kn, long hint); 74 int filt_soexcept(struct knote *kn, long hint); 75 76 int filt_sowmodify(struct kevent *kev, struct knote *kn); 77 int filt_sowprocess(struct knote *kn, struct kevent *kev); 78 79 int filt_sormodify(struct kevent *kev, struct knote *kn); 80 int filt_sorprocess(struct knote *kn, struct kevent *kev); 81 82 const struct filterops soread_filtops = { 83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 84 .f_attach = NULL, 85 .f_detach = filt_sordetach, 86 .f_event = filt_soread, 87 .f_modify = filt_sormodify, 88 .f_process = filt_sorprocess, 89 }; 90 91 const struct filterops sowrite_filtops = { 92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 93 .f_attach = NULL, 94 .f_detach = filt_sowdetach, 95 .f_event = filt_sowrite, 96 .f_modify = filt_sowmodify, 97 .f_process = filt_sowprocess, 98 }; 99 100 const struct filterops soexcept_filtops = { 101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 102 .f_attach = NULL, 103 .f_detach = filt_sordetach, 104 .f_event = filt_soexcept, 105 .f_modify = filt_sormodify, 106 .f_process = filt_sorprocess, 107 }; 108 109 #ifndef SOMINCONN 110 #define SOMINCONN 80 111 #endif /* SOMINCONN */ 112 113 int somaxconn = SOMAXCONN; 114 int sominconn = SOMINCONN; 115 116 struct pool socket_pool; 117 #ifdef SOCKET_SPLICE 118 struct pool sosplice_pool; 119 struct taskq *sosplice_taskq; 120 struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); 121 #endif 122 123 void 124 soinit(void) 125 { 126 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 127 "sockpl", NULL); 128 #ifdef SOCKET_SPLICE 129 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 130 "sosppl", NULL); 131 #endif 132 } 133 134 struct socket * 135 soalloc(const struct protosw *prp, int wait) 136 { 137 const struct domain *dp = prp->pr_domain; 138 struct socket *so; 139 140 so = pool_get(&socket_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 141 PR_ZERO); 142 if (so == NULL) 143 return (NULL); 144 rw_init_flags(&so->so_lock, dp->dom_name, RWL_DUPOK); 145 refcnt_init(&so->so_refcnt); 146 rw_init(&so->so_rcv.sb_lock, "sbufrcv"); 147 rw_init(&so->so_snd.sb_lock, "sbufsnd"); 148 mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0); 149 mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0); 150 klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx); 151 klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx); 152 sigio_init(&so->so_sigio); 153 TAILQ_INIT(&so->so_q0); 154 TAILQ_INIT(&so->so_q); 155 156 switch (dp->dom_family) { 157 case AF_INET: 158 case AF_INET6: 159 switch (prp->pr_type) { 160 case SOCK_RAW: 161 case SOCK_DGRAM: 162 so->so_snd.sb_flags |= SB_MTXLOCK; 163 so->so_rcv.sb_flags |= SB_MTXLOCK; 164 break; 165 } 166 break; 167 case AF_KEY: 168 case AF_ROUTE: 169 case AF_UNIX: 170 case AF_FRAME: 171 so->so_snd.sb_flags |= SB_MTXLOCK; 172 so->so_rcv.sb_flags |= SB_MTXLOCK; 173 break; 174 } 175 176 return (so); 177 } 178 179 /* 180 * Socket operation routines. 181 * These routines are called by the routines in 182 * sys_socket.c or from a system process, and 183 * implement the semantics of socket operations by 184 * switching out to the protocol specific routines. 185 */ 186 int 187 socreate(int dom, struct socket **aso, int type, int proto) 188 { 189 struct proc *p = curproc; /* XXX */ 190 const struct protosw *prp; 191 struct socket *so; 192 int error; 193 194 if (proto) 195 prp = pffindproto(dom, proto, type); 196 else 197 prp = pffindtype(dom, type); 198 if (prp == NULL || prp->pr_usrreqs == NULL) 199 return (EPROTONOSUPPORT); 200 if (prp->pr_type != type) 201 return (EPROTOTYPE); 202 so = soalloc(prp, M_WAIT); 203 so->so_type = type; 204 if (suser(p) == 0) 205 so->so_state = SS_PRIV; 206 so->so_ruid = p->p_ucred->cr_ruid; 207 so->so_euid = p->p_ucred->cr_uid; 208 so->so_rgid = p->p_ucred->cr_rgid; 209 so->so_egid = p->p_ucred->cr_gid; 210 so->so_cpid = p->p_p->ps_pid; 211 so->so_proto = prp; 212 so->so_snd.sb_timeo_nsecs = INFSLP; 213 so->so_rcv.sb_timeo_nsecs = INFSLP; 214 215 solock(so); 216 error = pru_attach(so, proto, M_WAIT); 217 if (error) { 218 so->so_state |= SS_NOFDREF; 219 /* sofree() calls sounlock(). */ 220 sofree(so, 0); 221 return (error); 222 } 223 sounlock(so); 224 *aso = so; 225 return (0); 226 } 227 228 int 229 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 230 { 231 soassertlocked(so); 232 return pru_bind(so, nam, p); 233 } 234 235 int 236 solisten(struct socket *so, int backlog) 237 { 238 int somaxconn_local = atomic_load_int(&somaxconn); 239 int sominconn_local = atomic_load_int(&sominconn); 240 int error; 241 242 switch (so->so_type) { 243 case SOCK_STREAM: 244 case SOCK_SEQPACKET: 245 break; 246 default: 247 return (EOPNOTSUPP); 248 } 249 250 soassertlocked(so); 251 252 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 253 return (EINVAL); 254 #ifdef SOCKET_SPLICE 255 if (isspliced(so) || issplicedback(so)) 256 return (EOPNOTSUPP); 257 #endif /* SOCKET_SPLICE */ 258 error = pru_listen(so); 259 if (error) 260 return (error); 261 if (TAILQ_FIRST(&so->so_q) == NULL) 262 so->so_options |= SO_ACCEPTCONN; 263 if (backlog < 0 || backlog > somaxconn_local) 264 backlog = somaxconn_local; 265 if (backlog < sominconn_local) 266 backlog = sominconn_local; 267 so->so_qlimit = backlog; 268 return (0); 269 } 270 271 #define SOSP_FREEING_READ 1 272 #define SOSP_FREEING_WRITE 2 273 void 274 sofree(struct socket *so, int keep_lock) 275 { 276 int persocket = solock_persocket(so); 277 278 soassertlocked(so); 279 280 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { 281 if (!keep_lock) 282 sounlock(so); 283 return; 284 } 285 if (so->so_head) { 286 struct socket *head = so->so_head; 287 288 /* 289 * We must not decommission a socket that's on the accept(2) 290 * queue. If we do, then accept(2) may hang after select(2) 291 * indicated that the listening socket was ready. 292 */ 293 if (so->so_onq == &head->so_q) { 294 if (!keep_lock) 295 sounlock(so); 296 return; 297 } 298 299 if (persocket) { 300 /* 301 * Concurrent close of `head' could 302 * abort `so' due to re-lock. 303 */ 304 soref(so); 305 soref(head); 306 sounlock(so); 307 solock(head); 308 solock(so); 309 310 if (so->so_onq != &head->so_q0) { 311 sounlock(head); 312 sounlock(so); 313 sorele(head); 314 sorele(so); 315 return; 316 } 317 318 sorele(head); 319 sorele(so); 320 } 321 322 soqremque(so, 0); 323 324 if (persocket) 325 sounlock(head); 326 } 327 328 switch (so->so_proto->pr_domain->dom_family) { 329 case AF_INET: 330 case AF_INET6: 331 if (so->so_proto->pr_type == SOCK_STREAM) 332 break; 333 /* FALLTHROUGH */ 334 default: 335 sounlock(so); 336 refcnt_finalize(&so->so_refcnt, "sofinal"); 337 solock(so); 338 break; 339 } 340 341 sigio_free(&so->so_sigio); 342 klist_free(&so->so_rcv.sb_klist); 343 klist_free(&so->so_snd.sb_klist); 344 345 mtx_enter(&so->so_snd.sb_mtx); 346 sbrelease(so, &so->so_snd); 347 mtx_leave(&so->so_snd.sb_mtx); 348 349 /* 350 * Unlocked dispose and cleanup is safe. Socket is unlinked 351 * from everywhere. Even concurrent sotask() thread will not 352 * call somove(). 353 */ 354 if (so->so_proto->pr_flags & PR_RIGHTS && 355 so->so_proto->pr_domain->dom_dispose) 356 (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 357 m_purge(so->so_rcv.sb_mb); 358 359 if (!keep_lock) 360 sounlock(so); 361 362 #ifdef SOCKET_SPLICE 363 if (so->so_sp) { 364 /* Reuse splice idle, sounsplice() has been called before. */ 365 timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, 366 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 367 timeout_add(&so->so_sp->ssp_idleto, 0); 368 } else 369 #endif /* SOCKET_SPLICE */ 370 { 371 pool_put(&socket_pool, so); 372 } 373 } 374 375 static inline uint64_t 376 solinger_nsec(struct socket *so) 377 { 378 if (so->so_linger == 0) 379 return INFSLP; 380 381 return SEC_TO_NSEC(so->so_linger); 382 } 383 384 /* 385 * Close a socket on last file table reference removal. 386 * Initiate disconnect if connected. 387 * Free socket when disconnect complete. 388 */ 389 int 390 soclose(struct socket *so, int flags) 391 { 392 struct socket *so2; 393 int error = 0; 394 395 solock(so); 396 /* Revoke async IO early. There is a final revocation in sofree(). */ 397 sigio_free(&so->so_sigio); 398 if (so->so_state & SS_ISCONNECTED) { 399 if (so->so_pcb == NULL) 400 goto discard; 401 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 402 error = sodisconnect(so); 403 if (error) 404 goto drop; 405 } 406 if (so->so_options & SO_LINGER) { 407 if ((so->so_state & SS_ISDISCONNECTING) && 408 (flags & MSG_DONTWAIT)) 409 goto drop; 410 while (so->so_state & SS_ISCONNECTED) { 411 error = sosleep_nsec(so, &so->so_timeo, 412 PSOCK | PCATCH, "netcls", 413 solinger_nsec(so)); 414 if (error) 415 break; 416 } 417 } 418 } 419 drop: 420 if (so->so_pcb) { 421 int error2; 422 error2 = pru_detach(so); 423 if (error == 0) 424 error = error2; 425 } 426 if (so->so_options & SO_ACCEPTCONN) { 427 int persocket = solock_persocket(so); 428 429 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 430 if (persocket) 431 solock(so2); 432 (void) soqremque(so2, 0); 433 if (persocket) 434 sounlock(so); 435 soabort(so2); 436 if (persocket) 437 solock(so); 438 } 439 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 440 if (persocket) 441 solock(so2); 442 (void) soqremque(so2, 1); 443 if (persocket) 444 sounlock(so); 445 soabort(so2); 446 if (persocket) 447 solock(so); 448 } 449 } 450 discard: 451 if (so->so_state & SS_NOFDREF) 452 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 453 so->so_state |= SS_NOFDREF; 454 455 #ifdef SOCKET_SPLICE 456 if (so->so_sp) { 457 struct socket *soback; 458 459 if (so->so_proto->pr_flags & PR_WANTRCVD) { 460 /* 461 * Copy - Paste, but can't relock and sleep in 462 * sofree() in tcp(4) case. That's why tcp(4) 463 * still rely on solock() for splicing and 464 * unsplicing. 465 */ 466 467 if (issplicedback(so)) { 468 int freeing = SOSP_FREEING_WRITE; 469 470 if (so->so_sp->ssp_soback == so) 471 freeing |= SOSP_FREEING_READ; 472 sounsplice(so->so_sp->ssp_soback, so, freeing); 473 } 474 if (isspliced(so)) { 475 int freeing = SOSP_FREEING_READ; 476 477 if (so == so->so_sp->ssp_socket) 478 freeing |= SOSP_FREEING_WRITE; 479 sounsplice(so, so->so_sp->ssp_socket, freeing); 480 } 481 goto free; 482 } 483 484 sounlock(so); 485 mtx_enter(&so->so_snd.sb_mtx); 486 /* 487 * Concurrent sounsplice() locks `sb_mtx' mutexes on 488 * both `so_snd' and `so_rcv' before unsplice sockets. 489 */ 490 if ((soback = so->so_sp->ssp_soback) == NULL) { 491 mtx_leave(&so->so_snd.sb_mtx); 492 goto notsplicedback; 493 } 494 soref(soback); 495 mtx_leave(&so->so_snd.sb_mtx); 496 497 /* 498 * `so' can be only unspliced, and never spliced again. 499 * Thus if issplicedback(so) check is positive, socket is 500 * still spliced and `ssp_soback' points to the same 501 * socket that `soback'. 502 */ 503 sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR); 504 if (issplicedback(so)) { 505 int freeing = SOSP_FREEING_WRITE; 506 507 if (so->so_sp->ssp_soback == so) 508 freeing |= SOSP_FREEING_READ; 509 solock(soback); 510 sounsplice(so->so_sp->ssp_soback, so, freeing); 511 sounlock(soback); 512 } 513 sbunlock(&soback->so_rcv); 514 sorele(soback); 515 516 notsplicedback: 517 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 518 if (isspliced(so)) { 519 int freeing = SOSP_FREEING_READ; 520 521 if (so == so->so_sp->ssp_socket) 522 freeing |= SOSP_FREEING_WRITE; 523 solock(so); 524 sounsplice(so, so->so_sp->ssp_socket, freeing); 525 sounlock(so); 526 } 527 sbunlock(&so->so_rcv); 528 529 solock(so); 530 } 531 free: 532 #endif /* SOCKET_SPLICE */ 533 /* sofree() calls sounlock(). */ 534 sofree(so, 0); 535 return (error); 536 } 537 538 void 539 soabort(struct socket *so) 540 { 541 soassertlocked(so); 542 pru_abort(so); 543 } 544 545 int 546 soaccept(struct socket *so, struct mbuf *nam) 547 { 548 int error = 0; 549 550 soassertlocked(so); 551 552 if ((so->so_state & SS_NOFDREF) == 0) 553 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 554 so->so_state &= ~SS_NOFDREF; 555 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 556 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 557 error = pru_accept(so, nam); 558 else 559 error = ECONNABORTED; 560 return (error); 561 } 562 563 int 564 soconnect(struct socket *so, struct mbuf *nam) 565 { 566 int error; 567 568 soassertlocked(so); 569 570 if (so->so_options & SO_ACCEPTCONN) 571 return (EOPNOTSUPP); 572 /* 573 * If protocol is connection-based, can only connect once. 574 * Otherwise, if connected, try to disconnect first. 575 * This allows user to disconnect by connecting to, e.g., 576 * a null address. 577 */ 578 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 579 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 580 (error = sodisconnect(so)))) 581 error = EISCONN; 582 else 583 error = pru_connect(so, nam); 584 return (error); 585 } 586 587 int 588 soconnect2(struct socket *so1, struct socket *so2) 589 { 590 int persocket, error; 591 592 if ((persocket = solock_persocket(so1))) 593 solock_pair(so1, so2); 594 else 595 solock(so1); 596 597 error = pru_connect2(so1, so2); 598 599 if (persocket) 600 sounlock(so2); 601 sounlock(so1); 602 return (error); 603 } 604 605 int 606 sodisconnect(struct socket *so) 607 { 608 int error; 609 610 soassertlocked(so); 611 612 if ((so->so_state & SS_ISCONNECTED) == 0) 613 return (ENOTCONN); 614 if (so->so_state & SS_ISDISCONNECTING) 615 return (EALREADY); 616 error = pru_disconnect(so); 617 return (error); 618 } 619 620 int m_getuio(struct mbuf **, int, long, struct uio *); 621 622 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 623 /* 624 * Send on a socket. 625 * If send must go all at once and message is larger than 626 * send buffering, then hard error. 627 * Lock against other senders. 628 * If must go all at once and not enough room now, then 629 * inform user that this would block and do nothing. 630 * Otherwise, if nonblocking, send as much as possible. 631 * The data to be sent is described by "uio" if nonzero, 632 * otherwise by the mbuf chain "top" (which must be null 633 * if uio is not). Data provided in mbuf chain must be small 634 * enough to send all at once. 635 * 636 * Returns nonzero on error, timeout or signal; callers 637 * must check for short counts if EINTR/ERESTART are returned. 638 * Data and control buffers are freed on return. 639 */ 640 int 641 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 642 struct mbuf *control, int flags) 643 { 644 long space, clen = 0; 645 size_t resid; 646 int error; 647 int atomic = sosendallatonce(so) || top; 648 int dosolock = ((so->so_snd.sb_flags & SB_MTXLOCK) == 0); 649 650 if (uio) 651 resid = uio->uio_resid; 652 else 653 resid = top->m_pkthdr.len; 654 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 655 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 656 m_freem(top); 657 m_freem(control); 658 return (EINVAL); 659 } 660 if (uio && uio->uio_procp) 661 uio->uio_procp->p_ru.ru_msgsnd++; 662 if (control) { 663 /* 664 * In theory clen should be unsigned (since control->m_len is). 665 * However, space must be signed, as it might be less than 0 666 * if we over-committed, and we must use a signed comparison 667 * of space and clen. 668 */ 669 clen = control->m_len; 670 /* reserve extra space for AF_UNIX's internalize */ 671 if (so->so_proto->pr_domain->dom_family == AF_UNIX && 672 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 673 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 674 clen = CMSG_SPACE( 675 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 676 (sizeof(struct fdpass) / sizeof(int))); 677 } 678 679 #define snderr(errno) { error = errno; goto release; } 680 681 restart: 682 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 683 goto out; 684 if (dosolock) 685 solock_shared(so); 686 sb_mtx_lock(&so->so_snd); 687 so->so_snd.sb_state |= SS_ISSENDING; 688 do { 689 if (so->so_snd.sb_state & SS_CANTSENDMORE) 690 snderr(EPIPE); 691 if ((error = READ_ONCE(so->so_error))) { 692 so->so_error = 0; 693 snderr(error); 694 } 695 if ((so->so_state & SS_ISCONNECTED) == 0) { 696 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 697 if (!(resid == 0 && clen != 0)) 698 snderr(ENOTCONN); 699 } else if (addr == NULL) 700 snderr(EDESTADDRREQ); 701 } 702 space = sbspace_locked(so, &so->so_snd); 703 if (flags & MSG_OOB) 704 space += 1024; 705 if (so->so_proto->pr_domain->dom_family == AF_UNIX) { 706 if (atomic && resid > so->so_snd.sb_hiwat) 707 snderr(EMSGSIZE); 708 } else { 709 if (clen > so->so_snd.sb_hiwat || 710 (atomic && resid > so->so_snd.sb_hiwat - clen)) 711 snderr(EMSGSIZE); 712 } 713 if (space < clen || 714 (space - clen < resid && 715 (atomic || space < so->so_snd.sb_lowat))) { 716 if (flags & MSG_DONTWAIT) 717 snderr(EWOULDBLOCK); 718 sbunlock(&so->so_snd); 719 error = sbwait(so, &so->so_snd); 720 so->so_snd.sb_state &= ~SS_ISSENDING; 721 sb_mtx_unlock(&so->so_snd); 722 if (dosolock) 723 sounlock_shared(so); 724 if (error) 725 goto out; 726 goto restart; 727 } 728 space -= clen; 729 do { 730 if (uio == NULL) { 731 /* 732 * Data is prepackaged in "top". 733 */ 734 resid = 0; 735 if (flags & MSG_EOR) 736 top->m_flags |= M_EOR; 737 } else { 738 sb_mtx_unlock(&so->so_snd); 739 if (dosolock) 740 sounlock_shared(so); 741 error = m_getuio(&top, atomic, space, uio); 742 if (dosolock) 743 solock_shared(so); 744 sb_mtx_lock(&so->so_snd); 745 if (error) 746 goto release; 747 space -= top->m_pkthdr.len; 748 resid = uio->uio_resid; 749 if (flags & MSG_EOR) 750 top->m_flags |= M_EOR; 751 } 752 if (resid == 0) 753 so->so_snd.sb_state &= ~SS_ISSENDING; 754 if (top && so->so_options & SO_ZEROIZE) 755 top->m_flags |= M_ZEROIZE; 756 sb_mtx_unlock(&so->so_snd); 757 if (!dosolock) 758 solock_shared(so); 759 if (flags & MSG_OOB) 760 error = pru_sendoob(so, top, addr, control); 761 else 762 error = pru_send(so, top, addr, control); 763 if (!dosolock) 764 sounlock_shared(so); 765 sb_mtx_lock(&so->so_snd); 766 clen = 0; 767 control = NULL; 768 top = NULL; 769 if (error) 770 goto release; 771 } while (resid && space > 0); 772 } while (resid); 773 774 release: 775 so->so_snd.sb_state &= ~SS_ISSENDING; 776 sb_mtx_unlock(&so->so_snd); 777 if (dosolock) 778 sounlock_shared(so); 779 sbunlock(&so->so_snd); 780 out: 781 m_freem(top); 782 m_freem(control); 783 return (error); 784 } 785 786 int 787 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 788 { 789 struct mbuf *m, *top = NULL; 790 struct mbuf **nextp = ⊤ 791 u_long len, mlen; 792 size_t resid = uio->uio_resid; 793 int error; 794 795 do { 796 if (top == NULL) { 797 MGETHDR(m, M_WAIT, MT_DATA); 798 mlen = MHLEN; 799 } else { 800 MGET(m, M_WAIT, MT_DATA); 801 mlen = MLEN; 802 } 803 /* chain mbuf together */ 804 *nextp = m; 805 nextp = &m->m_next; 806 807 resid = ulmin(resid, space); 808 if (resid >= MINCLSIZE) { 809 MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); 810 if ((m->m_flags & M_EXT) == 0) 811 MCLGETL(m, M_NOWAIT, MCLBYTES); 812 if ((m->m_flags & M_EXT) == 0) 813 goto nopages; 814 mlen = m->m_ext.ext_size; 815 len = ulmin(mlen, resid); 816 /* 817 * For datagram protocols, leave room 818 * for protocol headers in first mbuf. 819 */ 820 if (atomic && m == top && len < mlen - max_hdr) 821 m->m_data += max_hdr; 822 } else { 823 nopages: 824 len = ulmin(mlen, resid); 825 /* 826 * For datagram protocols, leave room 827 * for protocol headers in first mbuf. 828 */ 829 if (atomic && m == top && len < mlen - max_hdr) 830 m_align(m, len); 831 } 832 833 error = uiomove(mtod(m, caddr_t), len, uio); 834 if (error) { 835 m_freem(top); 836 return (error); 837 } 838 839 /* adjust counters */ 840 resid = uio->uio_resid; 841 space -= len; 842 m->m_len = len; 843 top->m_pkthdr.len += len; 844 845 /* Is there more space and more data? */ 846 } while (space > 0 && resid > 0); 847 848 *mp = top; 849 return 0; 850 } 851 852 /* 853 * Following replacement or removal of the first mbuf on the first 854 * mbuf chain of a socket buffer, push necessary state changes back 855 * into the socket buffer so that other consumers see the values 856 * consistently. 'nextrecord' is the callers locally stored value of 857 * the original value of sb->sb_mb->m_nextpkt which must be restored 858 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 859 */ 860 void 861 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 862 { 863 864 /* 865 * First, update for the new value of nextrecord. If necessary, 866 * make it the first record. 867 */ 868 if (sb->sb_mb != NULL) 869 sb->sb_mb->m_nextpkt = nextrecord; 870 else 871 sb->sb_mb = nextrecord; 872 873 /* 874 * Now update any dependent socket buffer fields to reflect 875 * the new state. This is an inline of SB_EMPTY_FIXUP, with 876 * the addition of a second clause that takes care of the 877 * case where sb_mb has been updated, but remains the last 878 * record. 879 */ 880 if (sb->sb_mb == NULL) { 881 sb->sb_mbtail = NULL; 882 sb->sb_lastrecord = NULL; 883 } else if (sb->sb_mb->m_nextpkt == NULL) 884 sb->sb_lastrecord = sb->sb_mb; 885 } 886 887 /* 888 * Implement receive operations on a socket. 889 * We depend on the way that records are added to the sockbuf 890 * by sbappend*. In particular, each record (mbufs linked through m_next) 891 * must begin with an address if the protocol so specifies, 892 * followed by an optional mbuf or mbufs containing ancillary data, 893 * and then zero or more mbufs of data. 894 * In order to avoid blocking network for the entire time here, we release 895 * the solock() while doing the actual copy to user space. 896 * Although the sockbuf is locked, new data may still be appended, 897 * and thus we must maintain consistency of the sockbuf during that time. 898 * 899 * The caller may receive the data as a single mbuf chain by supplying 900 * an mbuf **mp0 for use in returning the chain. The uio is then used 901 * only for the count in uio_resid. 902 */ 903 int 904 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 905 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 906 socklen_t controllen) 907 { 908 struct mbuf *m, **mp; 909 struct mbuf *cm; 910 u_long len, offset, moff; 911 int flags, error, error2, type, uio_error = 0; 912 const struct protosw *pr = so->so_proto; 913 struct mbuf *nextrecord; 914 size_t resid, orig_resid = uio->uio_resid; 915 int dosolock = ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0); 916 917 mp = mp0; 918 if (paddr) 919 *paddr = NULL; 920 if (controlp) 921 *controlp = NULL; 922 if (flagsp) 923 flags = *flagsp &~ MSG_EOR; 924 else 925 flags = 0; 926 if (flags & MSG_OOB) { 927 m = m_get(M_WAIT, MT_DATA); 928 solock(so); 929 error = pru_rcvoob(so, m, flags & MSG_PEEK); 930 sounlock(so); 931 if (error) 932 goto bad; 933 do { 934 error = uiomove(mtod(m, caddr_t), 935 ulmin(uio->uio_resid, m->m_len), uio); 936 m = m_free(m); 937 } while (uio->uio_resid && error == 0 && m); 938 bad: 939 m_freem(m); 940 return (error); 941 } 942 if (mp) 943 *mp = NULL; 944 945 restart: 946 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 947 return (error); 948 if (dosolock) 949 solock_shared(so); 950 sb_mtx_lock(&so->so_rcv); 951 952 m = so->so_rcv.sb_mb; 953 #ifdef SOCKET_SPLICE 954 if (isspliced(so)) 955 m = NULL; 956 #endif /* SOCKET_SPLICE */ 957 /* 958 * If we have less data than requested, block awaiting more 959 * (subject to any timeout) if: 960 * 1. the current count is less than the low water mark, 961 * 2. MSG_WAITALL is set, and it is possible to do the entire 962 * receive operation at once if we block (resid <= hiwat), or 963 * 3. MSG_DONTWAIT is not set. 964 * If MSG_WAITALL is set but resid is larger than the receive buffer, 965 * we have to do the receive in sections, and thus risk returning 966 * a short count if a timeout or signal occurs after we start. 967 */ 968 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 969 so->so_rcv.sb_cc < uio->uio_resid) && 970 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 971 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 972 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 973 #ifdef DIAGNOSTIC 974 if (m == NULL && so->so_rcv.sb_cc) 975 #ifdef SOCKET_SPLICE 976 if (!isspliced(so)) 977 #endif /* SOCKET_SPLICE */ 978 panic("receive 1: so %p, so_type %d, sb_cc %lu", 979 so, so->so_type, so->so_rcv.sb_cc); 980 #endif 981 if ((error2 = READ_ONCE(so->so_error))) { 982 if (m) 983 goto dontblock; 984 error = error2; 985 if ((flags & MSG_PEEK) == 0) 986 so->so_error = 0; 987 goto release; 988 } 989 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 990 if (m) 991 goto dontblock; 992 else if (so->so_rcv.sb_cc == 0) 993 goto release; 994 } 995 for (; m; m = m->m_next) 996 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 997 m = so->so_rcv.sb_mb; 998 goto dontblock; 999 } 1000 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1001 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1002 error = ENOTCONN; 1003 goto release; 1004 } 1005 if (uio->uio_resid == 0 && controlp == NULL) 1006 goto release; 1007 if (flags & MSG_DONTWAIT) { 1008 error = EWOULDBLOCK; 1009 goto release; 1010 } 1011 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 1012 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 1013 1014 sbunlock(&so->so_rcv); 1015 error = sbwait(so, &so->so_rcv); 1016 sb_mtx_unlock(&so->so_rcv); 1017 if (dosolock) 1018 sounlock_shared(so); 1019 if (error) 1020 return (error); 1021 goto restart; 1022 } 1023 dontblock: 1024 /* 1025 * On entry here, m points to the first record of the socket buffer. 1026 * From this point onward, we maintain 'nextrecord' as a cache of the 1027 * pointer to the next record in the socket buffer. We must keep the 1028 * various socket buffer pointers and local stack versions of the 1029 * pointers in sync, pushing out modifications before operations that 1030 * may sleep, and re-reading them afterwards. 1031 * 1032 * Otherwise, we will race with the network stack appending new data 1033 * or records onto the socket buffer by using inconsistent/stale 1034 * versions of the field, possibly resulting in socket buffer 1035 * corruption. 1036 */ 1037 if (uio->uio_procp) 1038 uio->uio_procp->p_ru.ru_msgrcv++; 1039 KASSERT(m == so->so_rcv.sb_mb); 1040 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 1041 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 1042 nextrecord = m->m_nextpkt; 1043 if (pr->pr_flags & PR_ADDR) { 1044 #ifdef DIAGNOSTIC 1045 if (m->m_type != MT_SONAME) 1046 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 1047 so, so->so_type, m, m->m_type); 1048 #endif 1049 orig_resid = 0; 1050 if (flags & MSG_PEEK) { 1051 if (paddr) 1052 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 1053 m = m->m_next; 1054 } else { 1055 sbfree(so, &so->so_rcv, m); 1056 if (paddr) { 1057 *paddr = m; 1058 so->so_rcv.sb_mb = m->m_next; 1059 m->m_next = NULL; 1060 m = so->so_rcv.sb_mb; 1061 } else { 1062 so->so_rcv.sb_mb = m_free(m); 1063 m = so->so_rcv.sb_mb; 1064 } 1065 sbsync(&so->so_rcv, nextrecord); 1066 } 1067 } 1068 while (m && m->m_type == MT_CONTROL && error == 0) { 1069 int skip = 0; 1070 if (flags & MSG_PEEK) { 1071 if (mtod(m, struct cmsghdr *)->cmsg_type == 1072 SCM_RIGHTS) { 1073 /* don't leak internalized SCM_RIGHTS msgs */ 1074 skip = 1; 1075 } else if (controlp) 1076 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 1077 m = m->m_next; 1078 } else { 1079 sbfree(so, &so->so_rcv, m); 1080 so->so_rcv.sb_mb = m->m_next; 1081 m->m_nextpkt = m->m_next = NULL; 1082 cm = m; 1083 m = so->so_rcv.sb_mb; 1084 sbsync(&so->so_rcv, nextrecord); 1085 if (controlp) { 1086 if (pr->pr_domain->dom_externalize) { 1087 sb_mtx_unlock(&so->so_rcv); 1088 if (dosolock) 1089 sounlock_shared(so); 1090 error = 1091 (*pr->pr_domain->dom_externalize) 1092 (cm, controllen, flags); 1093 if (dosolock) 1094 solock_shared(so); 1095 sb_mtx_lock(&so->so_rcv); 1096 } 1097 *controlp = cm; 1098 } else { 1099 /* 1100 * Dispose of any SCM_RIGHTS message that went 1101 * through the read path rather than recv. 1102 */ 1103 if (pr->pr_domain->dom_dispose) { 1104 sb_mtx_unlock(&so->so_rcv); 1105 pr->pr_domain->dom_dispose(cm); 1106 sb_mtx_lock(&so->so_rcv); 1107 } 1108 m_free(cm); 1109 } 1110 } 1111 if (m != NULL) 1112 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1113 else 1114 nextrecord = so->so_rcv.sb_mb; 1115 if (controlp && !skip) 1116 controlp = &(*controlp)->m_next; 1117 orig_resid = 0; 1118 } 1119 1120 /* If m is non-NULL, we have some data to read. */ 1121 if (m) { 1122 type = m->m_type; 1123 if (type == MT_OOBDATA) 1124 flags |= MSG_OOB; 1125 if (m->m_flags & M_BCAST) 1126 flags |= MSG_BCAST; 1127 if (m->m_flags & M_MCAST) 1128 flags |= MSG_MCAST; 1129 } 1130 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1131 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1132 1133 moff = 0; 1134 offset = 0; 1135 while (m && uio->uio_resid > 0 && error == 0) { 1136 if (m->m_type == MT_OOBDATA) { 1137 if (type != MT_OOBDATA) 1138 break; 1139 } else if (type == MT_OOBDATA) { 1140 break; 1141 } else if (m->m_type == MT_CONTROL) { 1142 /* 1143 * If there is more than one control message in the 1144 * stream, we do a short read. Next can be received 1145 * or disposed by another system call. 1146 */ 1147 break; 1148 #ifdef DIAGNOSTIC 1149 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { 1150 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 1151 so, so->so_type, m, m->m_type); 1152 #endif 1153 } 1154 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1155 len = uio->uio_resid; 1156 if (so->so_oobmark && len > so->so_oobmark - offset) 1157 len = so->so_oobmark - offset; 1158 if (len > m->m_len - moff) 1159 len = m->m_len - moff; 1160 /* 1161 * If mp is set, just pass back the mbufs. 1162 * Otherwise copy them out via the uio, then free. 1163 * Sockbuf must be consistent here (points to current mbuf, 1164 * it points to next record) when we drop priority; 1165 * we must note any additions to the sockbuf when we 1166 * block interrupts again. 1167 */ 1168 if (mp == NULL && uio_error == 0) { 1169 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1170 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1171 resid = uio->uio_resid; 1172 sb_mtx_unlock(&so->so_rcv); 1173 if (dosolock) 1174 sounlock_shared(so); 1175 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1176 if (dosolock) 1177 solock_shared(so); 1178 sb_mtx_lock(&so->so_rcv); 1179 if (uio_error) 1180 uio->uio_resid = resid - len; 1181 } else 1182 uio->uio_resid -= len; 1183 if (len == m->m_len - moff) { 1184 if (m->m_flags & M_EOR) 1185 flags |= MSG_EOR; 1186 if (flags & MSG_PEEK) { 1187 m = m->m_next; 1188 moff = 0; 1189 orig_resid = 0; 1190 } else { 1191 nextrecord = m->m_nextpkt; 1192 sbfree(so, &so->so_rcv, m); 1193 if (mp) { 1194 *mp = m; 1195 mp = &m->m_next; 1196 so->so_rcv.sb_mb = m = m->m_next; 1197 *mp = NULL; 1198 } else { 1199 so->so_rcv.sb_mb = m_free(m); 1200 m = so->so_rcv.sb_mb; 1201 } 1202 /* 1203 * If m != NULL, we also know that 1204 * so->so_rcv.sb_mb != NULL. 1205 */ 1206 KASSERT(so->so_rcv.sb_mb == m); 1207 if (m) { 1208 m->m_nextpkt = nextrecord; 1209 if (nextrecord == NULL) 1210 so->so_rcv.sb_lastrecord = m; 1211 } else { 1212 so->so_rcv.sb_mb = nextrecord; 1213 SB_EMPTY_FIXUP(&so->so_rcv); 1214 } 1215 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1216 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1217 } 1218 } else { 1219 if (flags & MSG_PEEK) { 1220 moff += len; 1221 orig_resid = 0; 1222 } else { 1223 if (mp) 1224 *mp = m_copym(m, 0, len, M_WAIT); 1225 m->m_data += len; 1226 m->m_len -= len; 1227 so->so_rcv.sb_cc -= len; 1228 so->so_rcv.sb_datacc -= len; 1229 } 1230 } 1231 if (so->so_oobmark) { 1232 if ((flags & MSG_PEEK) == 0) { 1233 so->so_oobmark -= len; 1234 if (so->so_oobmark == 0) { 1235 so->so_rcv.sb_state |= SS_RCVATMARK; 1236 break; 1237 } 1238 } else { 1239 offset += len; 1240 if (offset == so->so_oobmark) 1241 break; 1242 } 1243 } 1244 if (flags & MSG_EOR) 1245 break; 1246 /* 1247 * If the MSG_WAITALL flag is set (for non-atomic socket), 1248 * we must not quit until "uio->uio_resid == 0" or an error 1249 * termination. If a signal/timeout occurs, return 1250 * with a short count but without error. 1251 * Keep sockbuf locked against other readers. 1252 */ 1253 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1254 !sosendallatonce(so) && !nextrecord) { 1255 if (so->so_rcv.sb_state & SS_CANTRCVMORE || 1256 so->so_error) 1257 break; 1258 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1259 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1260 if (sbwait(so, &so->so_rcv)) { 1261 sb_mtx_unlock(&so->so_rcv); 1262 if (dosolock) 1263 sounlock_shared(so); 1264 sbunlock(&so->so_rcv); 1265 return (0); 1266 } 1267 if ((m = so->so_rcv.sb_mb) != NULL) 1268 nextrecord = m->m_nextpkt; 1269 } 1270 } 1271 1272 if (m && pr->pr_flags & PR_ATOMIC) { 1273 flags |= MSG_TRUNC; 1274 if ((flags & MSG_PEEK) == 0) 1275 (void) sbdroprecord(so, &so->so_rcv); 1276 } 1277 if ((flags & MSG_PEEK) == 0) { 1278 if (m == NULL) { 1279 /* 1280 * First part is an inline SB_EMPTY_FIXUP(). Second 1281 * part makes sure sb_lastrecord is up-to-date if 1282 * there is still data in the socket buffer. 1283 */ 1284 so->so_rcv.sb_mb = nextrecord; 1285 if (so->so_rcv.sb_mb == NULL) { 1286 so->so_rcv.sb_mbtail = NULL; 1287 so->so_rcv.sb_lastrecord = NULL; 1288 } else if (nextrecord->m_nextpkt == NULL) 1289 so->so_rcv.sb_lastrecord = nextrecord; 1290 } 1291 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1292 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1293 if (pr->pr_flags & PR_WANTRCVD) { 1294 sb_mtx_unlock(&so->so_rcv); 1295 if (!dosolock) 1296 solock_shared(so); 1297 pru_rcvd(so); 1298 if (!dosolock) 1299 sounlock_shared(so); 1300 sb_mtx_lock(&so->so_rcv); 1301 } 1302 } 1303 if (orig_resid == uio->uio_resid && orig_resid && 1304 (flags & MSG_EOR) == 0 && 1305 (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) { 1306 sb_mtx_unlock(&so->so_rcv); 1307 sbunlock(&so->so_rcv); 1308 goto restart; 1309 } 1310 1311 if (uio_error) 1312 error = uio_error; 1313 1314 if (flagsp) 1315 *flagsp |= flags; 1316 release: 1317 sb_mtx_unlock(&so->so_rcv); 1318 if (dosolock) 1319 sounlock_shared(so); 1320 sbunlock(&so->so_rcv); 1321 return (error); 1322 } 1323 1324 int 1325 soshutdown(struct socket *so, int how) 1326 { 1327 int error = 0; 1328 1329 switch (how) { 1330 case SHUT_RD: 1331 sorflush(so); 1332 break; 1333 case SHUT_RDWR: 1334 sorflush(so); 1335 /* FALLTHROUGH */ 1336 case SHUT_WR: 1337 solock(so); 1338 error = pru_shutdown(so); 1339 sounlock(so); 1340 break; 1341 default: 1342 error = EINVAL; 1343 break; 1344 } 1345 1346 return (error); 1347 } 1348 1349 void 1350 sorflush(struct socket *so) 1351 { 1352 struct sockbuf *sb = &so->so_rcv; 1353 struct mbuf *m; 1354 const struct protosw *pr = so->so_proto; 1355 int error; 1356 1357 error = sblock(sb, SBL_WAIT | SBL_NOINTR); 1358 /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ 1359 KASSERT(error == 0); 1360 1361 solock_shared(so); 1362 socantrcvmore(so); 1363 mtx_enter(&sb->sb_mtx); 1364 m = sb->sb_mb; 1365 memset(&sb->sb_startzero, 0, 1366 (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); 1367 sb->sb_timeo_nsecs = INFSLP; 1368 mtx_leave(&sb->sb_mtx); 1369 sounlock_shared(so); 1370 sbunlock(sb); 1371 1372 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1373 (*pr->pr_domain->dom_dispose)(m); 1374 m_purge(m); 1375 } 1376 1377 #ifdef SOCKET_SPLICE 1378 1379 #define so_splicelen so_sp->ssp_len 1380 #define so_splicemax so_sp->ssp_max 1381 #define so_idletv so_sp->ssp_idletv 1382 #define so_idleto so_sp->ssp_idleto 1383 #define so_splicetask so_sp->ssp_task 1384 1385 int 1386 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1387 { 1388 struct file *fp; 1389 struct socket *sosp; 1390 struct taskq *tq; 1391 int error = 0; 1392 1393 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1394 return (EPROTONOSUPPORT); 1395 if (max && max < 0) 1396 return (EINVAL); 1397 if (tv && (tv->tv_sec < 0 || !timerisvalid(tv))) 1398 return (EINVAL); 1399 1400 /* If no fd is given, unsplice by removing existing link. */ 1401 if (fd < 0) { 1402 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1403 return (error); 1404 solock(so); 1405 if (so->so_options & SO_ACCEPTCONN) { 1406 error = EOPNOTSUPP; 1407 goto out; 1408 } 1409 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1410 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1411 error = ENOTCONN; 1412 goto out; 1413 } 1414 1415 if (so->so_sp && so->so_sp->ssp_socket) 1416 sounsplice(so, so->so_sp->ssp_socket, 0); 1417 out: 1418 sounlock(so); 1419 sbunlock(&so->so_rcv); 1420 return (error); 1421 } 1422 1423 if (sosplice_taskq == NULL) { 1424 rw_enter_write(&sosplice_lock); 1425 if (sosplice_taskq == NULL) { 1426 tq = taskq_create("sosplice", 1, IPL_SOFTNET, 1427 TASKQ_MPSAFE); 1428 if (tq == NULL) { 1429 rw_exit_write(&sosplice_lock); 1430 return (ENOMEM); 1431 } 1432 /* Ensure the taskq is fully visible to other CPUs. */ 1433 membar_producer(); 1434 sosplice_taskq = tq; 1435 } 1436 rw_exit_write(&sosplice_lock); 1437 } else { 1438 /* Ensure the taskq is fully visible on this CPU. */ 1439 membar_consumer(); 1440 } 1441 1442 /* Find sosp, the drain socket where data will be spliced into. */ 1443 if ((error = getsock(curproc, fd, &fp)) != 0) 1444 return (error); 1445 sosp = fp->f_data; 1446 1447 if (sosp->so_proto->pr_usrreqs->pru_send != 1448 so->so_proto->pr_usrreqs->pru_send) { 1449 error = EPROTONOSUPPORT; 1450 goto frele; 1451 } 1452 1453 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1454 goto frele; 1455 if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) { 1456 sbunlock(&so->so_rcv); 1457 goto frele; 1458 } 1459 solock(so); 1460 1461 if ((so->so_options & SO_ACCEPTCONN) || 1462 (sosp->so_options & SO_ACCEPTCONN)) { 1463 error = EOPNOTSUPP; 1464 goto release; 1465 } 1466 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1467 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1468 error = ENOTCONN; 1469 goto release; 1470 } 1471 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1472 error = ENOTCONN; 1473 goto release; 1474 } 1475 if (so->so_sp == NULL) 1476 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1477 if (sosp->so_sp == NULL) 1478 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1479 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1480 error = EBUSY; 1481 goto release; 1482 } 1483 1484 so->so_splicelen = 0; 1485 so->so_splicemax = max; 1486 if (tv) 1487 so->so_idletv = *tv; 1488 else 1489 timerclear(&so->so_idletv); 1490 timeout_set_flags(&so->so_idleto, soidle, so, 1491 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 1492 task_set(&so->so_splicetask, sotask, so); 1493 1494 /* 1495 * To prevent sorwakeup() calling somove() before this somove() 1496 * has finished, the socket buffers are not marked as spliced yet. 1497 */ 1498 1499 /* Splice so and sosp together. */ 1500 mtx_enter(&so->so_rcv.sb_mtx); 1501 mtx_enter(&sosp->so_snd.sb_mtx); 1502 so->so_sp->ssp_socket = sosp; 1503 sosp->so_sp->ssp_soback = so; 1504 mtx_leave(&sosp->so_snd.sb_mtx); 1505 mtx_leave(&so->so_rcv.sb_mtx); 1506 1507 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1508 sounlock(so); 1509 if (somove(so, M_WAIT)) { 1510 mtx_enter(&so->so_rcv.sb_mtx); 1511 mtx_enter(&sosp->so_snd.sb_mtx); 1512 so->so_rcv.sb_flags |= SB_SPLICE; 1513 sosp->so_snd.sb_flags |= SB_SPLICE; 1514 mtx_leave(&sosp->so_snd.sb_mtx); 1515 mtx_leave(&so->so_rcv.sb_mtx); 1516 } 1517 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1518 solock(so); 1519 1520 release: 1521 sounlock(so); 1522 sbunlock(&sosp->so_snd); 1523 sbunlock(&so->so_rcv); 1524 frele: 1525 FRELE(fp, curproc); 1526 1527 return (error); 1528 } 1529 1530 void 1531 sounsplice(struct socket *so, struct socket *sosp, int freeing) 1532 { 1533 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1534 sbassertlocked(&so->so_rcv); 1535 soassertlocked(so); 1536 1537 task_del(sosplice_taskq, &so->so_splicetask); 1538 timeout_del(&so->so_idleto); 1539 1540 mtx_enter(&so->so_rcv.sb_mtx); 1541 mtx_enter(&sosp->so_snd.sb_mtx); 1542 so->so_rcv.sb_flags &= ~SB_SPLICE; 1543 sosp->so_snd.sb_flags &= ~SB_SPLICE; 1544 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1545 mtx_leave(&sosp->so_snd.sb_mtx); 1546 mtx_leave(&so->so_rcv.sb_mtx); 1547 1548 /* Do not wakeup a socket that is about to be freed. */ 1549 if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) 1550 sorwakeup(so); 1551 if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp)) 1552 sowwakeup(sosp); 1553 } 1554 1555 void 1556 soidle(void *arg) 1557 { 1558 struct socket *so = arg; 1559 1560 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1561 solock(so); 1562 /* 1563 * Depending on socket type, sblock(&so->so_rcv) or solock() 1564 * is always held while modifying SB_SPLICE and 1565 * so->so_sp->ssp_socket. 1566 */ 1567 if (so->so_rcv.sb_flags & SB_SPLICE) { 1568 so->so_error = ETIMEDOUT; 1569 sounsplice(so, so->so_sp->ssp_socket, 0); 1570 } 1571 sounlock(so); 1572 sbunlock(&so->so_rcv); 1573 } 1574 1575 void 1576 sotask(void *arg) 1577 { 1578 struct socket *so = arg; 1579 int doyield = 0; 1580 int sockstream = (so->so_proto->pr_flags & PR_WANTRCVD); 1581 1582 /* 1583 * sblock() on `so_rcv' protects sockets from being unspliced 1584 * for UDP case. TCP sockets still rely on solock(). 1585 */ 1586 1587 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1588 if (sockstream) 1589 solock(so); 1590 1591 if (so->so_rcv.sb_flags & SB_SPLICE) { 1592 if (sockstream) 1593 doyield = 1; 1594 somove(so, M_DONTWAIT); 1595 } 1596 1597 if (sockstream) 1598 sounlock(so); 1599 sbunlock(&so->so_rcv); 1600 1601 if (doyield) { 1602 /* Avoid user land starvation. */ 1603 yield(); 1604 } 1605 } 1606 1607 /* 1608 * The socket splicing task or idle timeout may sleep while grabbing the net 1609 * lock. As sofree() can be called anytime, sotask() or soidle() could access 1610 * the socket memory of a freed socket after wakeup. So delay the pool_put() 1611 * after all pending socket splicing tasks or timeouts have finished. Do this 1612 * by scheduling it on the same threads. 1613 */ 1614 void 1615 soreaper(void *arg) 1616 { 1617 struct socket *so = arg; 1618 1619 /* Reuse splice task, sounsplice() has been called before. */ 1620 task_set(&so->so_sp->ssp_task, soput, so); 1621 task_add(sosplice_taskq, &so->so_sp->ssp_task); 1622 } 1623 1624 void 1625 soput(void *arg) 1626 { 1627 struct socket *so = arg; 1628 1629 pool_put(&sosplice_pool, so->so_sp); 1630 pool_put(&socket_pool, so); 1631 } 1632 1633 /* 1634 * Move data from receive buffer of spliced source socket to send 1635 * buffer of drain socket. Try to move as much as possible in one 1636 * big chunk. It is a TCP only implementation. 1637 * Return value 0 means splicing has been finished, 1 continue. 1638 */ 1639 int 1640 somove(struct socket *so, int wait) 1641 { 1642 struct socket *sosp = so->so_sp->ssp_socket; 1643 struct mbuf *m, **mp, *nextrecord; 1644 u_long len, off, oobmark; 1645 long space; 1646 int error = 0, maxreached = 0, unsplice = 0; 1647 unsigned int rcvstate; 1648 int sockdgram = ((so->so_proto->pr_flags & 1649 PR_WANTRCVD) == 0); 1650 1651 if (sockdgram) 1652 sbassertlocked(&so->so_rcv); 1653 else 1654 soassertlocked(so); 1655 1656 mtx_enter(&so->so_rcv.sb_mtx); 1657 mtx_enter(&sosp->so_snd.sb_mtx); 1658 1659 nextpkt: 1660 if ((error = READ_ONCE(so->so_error))) 1661 goto release; 1662 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) { 1663 error = EPIPE; 1664 goto release; 1665 } 1666 1667 error = READ_ONCE(sosp->so_error); 1668 if (error) { 1669 if (error != ETIMEDOUT && error != EFBIG && error != ELOOP) 1670 goto release; 1671 error = 0; 1672 } 1673 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1674 goto release; 1675 1676 /* Calculate how many bytes can be copied now. */ 1677 len = so->so_rcv.sb_datacc; 1678 if (so->so_splicemax) { 1679 KASSERT(so->so_splicelen < so->so_splicemax); 1680 if (so->so_splicemax <= so->so_splicelen + len) { 1681 len = so->so_splicemax - so->so_splicelen; 1682 maxreached = 1; 1683 } 1684 } 1685 space = sbspace_locked(sosp, &sosp->so_snd); 1686 if (so->so_oobmark && so->so_oobmark < len && 1687 so->so_oobmark < space + 1024) 1688 space += 1024; 1689 if (space <= 0) { 1690 maxreached = 0; 1691 goto release; 1692 } 1693 if (space < len) { 1694 maxreached = 0; 1695 if (space < sosp->so_snd.sb_lowat) 1696 goto release; 1697 len = space; 1698 } 1699 sosp->so_snd.sb_state |= SS_ISSENDING; 1700 1701 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1702 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1703 m = so->so_rcv.sb_mb; 1704 if (m == NULL) 1705 goto release; 1706 nextrecord = m->m_nextpkt; 1707 1708 /* Drop address and control information not used with splicing. */ 1709 if (so->so_proto->pr_flags & PR_ADDR) { 1710 #ifdef DIAGNOSTIC 1711 if (m->m_type != MT_SONAME) 1712 panic("somove soname: so %p, so_type %d, m %p, " 1713 "m_type %d", so, so->so_type, m, m->m_type); 1714 #endif 1715 m = m->m_next; 1716 } 1717 while (m && m->m_type == MT_CONTROL) 1718 m = m->m_next; 1719 if (m == NULL) { 1720 sbdroprecord(so, &so->so_rcv); 1721 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1722 mtx_leave(&sosp->so_snd.sb_mtx); 1723 mtx_leave(&so->so_rcv.sb_mtx); 1724 pru_rcvd(so); 1725 mtx_enter(&so->so_rcv.sb_mtx); 1726 mtx_enter(&sosp->so_snd.sb_mtx); 1727 } 1728 goto nextpkt; 1729 } 1730 1731 /* 1732 * By splicing sockets connected to localhost, userland might create a 1733 * loop. Dissolve splicing with error if loop is detected by counter. 1734 * 1735 * If we deal with looped broadcast/multicast packet we bail out with 1736 * no error to suppress splice termination. 1737 */ 1738 if ((m->m_flags & M_PKTHDR) && 1739 ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) || 1740 ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) { 1741 error = ELOOP; 1742 goto release; 1743 } 1744 1745 if (so->so_proto->pr_flags & PR_ATOMIC) { 1746 if ((m->m_flags & M_PKTHDR) == 0) 1747 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1748 "m_type %d", so, so->so_type, m, m->m_type); 1749 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1750 error = EMSGSIZE; 1751 goto release; 1752 } 1753 if (len < m->m_pkthdr.len) 1754 goto release; 1755 if (m->m_pkthdr.len < len) { 1756 maxreached = 0; 1757 len = m->m_pkthdr.len; 1758 } 1759 /* 1760 * Throw away the name mbuf after it has been assured 1761 * that the whole first record can be processed. 1762 */ 1763 m = so->so_rcv.sb_mb; 1764 sbfree(so, &so->so_rcv, m); 1765 so->so_rcv.sb_mb = m_free(m); 1766 sbsync(&so->so_rcv, nextrecord); 1767 } 1768 /* 1769 * Throw away the control mbufs after it has been assured 1770 * that the whole first record can be processed. 1771 */ 1772 m = so->so_rcv.sb_mb; 1773 while (m && m->m_type == MT_CONTROL) { 1774 sbfree(so, &so->so_rcv, m); 1775 so->so_rcv.sb_mb = m_free(m); 1776 m = so->so_rcv.sb_mb; 1777 sbsync(&so->so_rcv, nextrecord); 1778 } 1779 1780 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1781 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1782 1783 /* Take at most len mbufs out of receive buffer. */ 1784 for (off = 0, mp = &m; off <= len && *mp; 1785 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1786 u_long size = len - off; 1787 1788 #ifdef DIAGNOSTIC 1789 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1790 panic("somove type: so %p, so_type %d, m %p, " 1791 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1792 #endif 1793 if ((*mp)->m_len > size) { 1794 /* 1795 * Move only a partial mbuf at maximum splice length or 1796 * if the drain buffer is too small for this large mbuf. 1797 */ 1798 if (!maxreached && sosp->so_snd.sb_datacc > 0) { 1799 len -= size; 1800 break; 1801 } 1802 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1803 if (*mp == NULL) { 1804 len -= size; 1805 break; 1806 } 1807 so->so_rcv.sb_mb->m_data += size; 1808 so->so_rcv.sb_mb->m_len -= size; 1809 so->so_rcv.sb_cc -= size; 1810 so->so_rcv.sb_datacc -= size; 1811 } else { 1812 *mp = so->so_rcv.sb_mb; 1813 sbfree(so, &so->so_rcv, *mp); 1814 so->so_rcv.sb_mb = (*mp)->m_next; 1815 sbsync(&so->so_rcv, nextrecord); 1816 } 1817 } 1818 *mp = NULL; 1819 1820 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1821 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1822 SBCHECK(so, &so->so_rcv); 1823 if (m == NULL) 1824 goto release; 1825 m->m_nextpkt = NULL; 1826 if (m->m_flags & M_PKTHDR) { 1827 m_resethdr(m); 1828 m->m_pkthdr.len = len; 1829 } 1830 1831 /* Send window update to source peer as receive buffer has changed. */ 1832 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1833 mtx_leave(&sosp->so_snd.sb_mtx); 1834 mtx_leave(&so->so_rcv.sb_mtx); 1835 pru_rcvd(so); 1836 mtx_enter(&so->so_rcv.sb_mtx); 1837 mtx_enter(&sosp->so_snd.sb_mtx); 1838 } 1839 1840 /* Receive buffer did shrink by len bytes, adjust oob. */ 1841 rcvstate = so->so_rcv.sb_state; 1842 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1843 oobmark = so->so_oobmark; 1844 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1845 if (oobmark) { 1846 if (oobmark == len) 1847 so->so_rcv.sb_state |= SS_RCVATMARK; 1848 if (oobmark >= len) 1849 oobmark = 0; 1850 } 1851 1852 /* 1853 * Handle oob data. If any malloc fails, ignore error. 1854 * TCP urgent data is not very reliable anyway. 1855 */ 1856 while (((rcvstate & SS_RCVATMARK) || oobmark) && 1857 (so->so_options & SO_OOBINLINE)) { 1858 struct mbuf *o = NULL; 1859 1860 if (rcvstate & SS_RCVATMARK) { 1861 o = m_get(wait, MT_DATA); 1862 rcvstate &= ~SS_RCVATMARK; 1863 } else if (oobmark) { 1864 o = m_split(m, oobmark, wait); 1865 if (o) { 1866 mtx_leave(&sosp->so_snd.sb_mtx); 1867 mtx_leave(&so->so_rcv.sb_mtx); 1868 error = pru_send(sosp, m, NULL, NULL); 1869 mtx_enter(&so->so_rcv.sb_mtx); 1870 mtx_enter(&sosp->so_snd.sb_mtx); 1871 1872 if (error) { 1873 if (sosp->so_snd.sb_state & 1874 SS_CANTSENDMORE) 1875 error = EPIPE; 1876 m_freem(o); 1877 goto release; 1878 } 1879 len -= oobmark; 1880 so->so_splicelen += oobmark; 1881 m = o; 1882 o = m_get(wait, MT_DATA); 1883 } 1884 oobmark = 0; 1885 } 1886 if (o) { 1887 o->m_len = 1; 1888 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1889 1890 mtx_leave(&sosp->so_snd.sb_mtx); 1891 mtx_leave(&so->so_rcv.sb_mtx); 1892 error = pru_sendoob(sosp, o, NULL, NULL); 1893 mtx_enter(&so->so_rcv.sb_mtx); 1894 mtx_enter(&sosp->so_snd.sb_mtx); 1895 1896 if (error) { 1897 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) 1898 error = EPIPE; 1899 m_freem(m); 1900 goto release; 1901 } 1902 len -= 1; 1903 so->so_splicelen += 1; 1904 if (oobmark) { 1905 oobmark -= 1; 1906 if (oobmark == 0) 1907 rcvstate |= SS_RCVATMARK; 1908 } 1909 m_adj(m, 1); 1910 } 1911 } 1912 1913 /* Append all remaining data to drain socket. */ 1914 if (so->so_rcv.sb_cc == 0 || maxreached) 1915 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1916 1917 mtx_leave(&sosp->so_snd.sb_mtx); 1918 mtx_leave(&so->so_rcv.sb_mtx); 1919 1920 if (sockdgram) 1921 solock_shared(sosp); 1922 error = pru_send(sosp, m, NULL, NULL); 1923 if (sockdgram) 1924 sounlock_shared(sosp); 1925 1926 mtx_enter(&so->so_rcv.sb_mtx); 1927 mtx_enter(&sosp->so_snd.sb_mtx); 1928 1929 if (error) { 1930 if (sosp->so_snd.sb_state & SS_CANTSENDMORE || 1931 sosp->so_pcb == NULL) 1932 error = EPIPE; 1933 goto release; 1934 } 1935 so->so_splicelen += len; 1936 1937 /* Move several packets if possible. */ 1938 if (!maxreached && nextrecord) 1939 goto nextpkt; 1940 1941 release: 1942 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1943 1944 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1945 error = EFBIG; 1946 if (error) 1947 WRITE_ONCE(so->so_error, error); 1948 1949 if (((so->so_rcv.sb_state & SS_CANTRCVMORE) && 1950 so->so_rcv.sb_cc == 0) || 1951 (sosp->so_snd.sb_state & SS_CANTSENDMORE) || 1952 maxreached || error) 1953 unsplice = 1; 1954 1955 mtx_leave(&sosp->so_snd.sb_mtx); 1956 mtx_leave(&so->so_rcv.sb_mtx); 1957 1958 if (unsplice) { 1959 if (sockdgram) 1960 solock(so); 1961 sounsplice(so, sosp, 0); 1962 if (sockdgram) 1963 sounlock(so); 1964 1965 return (0); 1966 } 1967 if (timerisset(&so->so_idletv)) 1968 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1969 return (1); 1970 } 1971 #endif /* SOCKET_SPLICE */ 1972 1973 void 1974 sorwakeup(struct socket *so) 1975 { 1976 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 1977 soassertlocked_readonly(so); 1978 1979 #ifdef SOCKET_SPLICE 1980 if (so->so_proto->pr_flags & PR_SPLICE) { 1981 sb_mtx_lock(&so->so_rcv); 1982 if (so->so_rcv.sb_flags & SB_SPLICE) 1983 task_add(sosplice_taskq, &so->so_splicetask); 1984 if (isspliced(so)) { 1985 sb_mtx_unlock(&so->so_rcv); 1986 return; 1987 } 1988 sb_mtx_unlock(&so->so_rcv); 1989 } 1990 #endif 1991 sowakeup(so, &so->so_rcv); 1992 if (so->so_upcall) 1993 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1994 } 1995 1996 void 1997 sowwakeup(struct socket *so) 1998 { 1999 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2000 soassertlocked_readonly(so); 2001 2002 #ifdef SOCKET_SPLICE 2003 if (so->so_proto->pr_flags & PR_SPLICE) { 2004 sb_mtx_lock(&so->so_snd); 2005 if (so->so_snd.sb_flags & SB_SPLICE) 2006 task_add(sosplice_taskq, 2007 &so->so_sp->ssp_soback->so_splicetask); 2008 if (issplicedback(so)) { 2009 sb_mtx_unlock(&so->so_snd); 2010 return; 2011 } 2012 sb_mtx_unlock(&so->so_snd); 2013 } 2014 #endif 2015 sowakeup(so, &so->so_snd); 2016 } 2017 2018 int 2019 sosetopt(struct socket *so, int level, int optname, struct mbuf *m) 2020 { 2021 int error = 0; 2022 2023 if (level != SOL_SOCKET) { 2024 if (so->so_proto->pr_ctloutput) { 2025 solock(so); 2026 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 2027 level, optname, m); 2028 sounlock(so); 2029 return (error); 2030 } 2031 error = ENOPROTOOPT; 2032 } else { 2033 switch (optname) { 2034 2035 case SO_LINGER: 2036 if (m == NULL || m->m_len != sizeof (struct linger) || 2037 mtod(m, struct linger *)->l_linger < 0 || 2038 mtod(m, struct linger *)->l_linger > SHRT_MAX) 2039 return (EINVAL); 2040 2041 solock(so); 2042 so->so_linger = mtod(m, struct linger *)->l_linger; 2043 if (*mtod(m, int *)) 2044 so->so_options |= optname; 2045 else 2046 so->so_options &= ~optname; 2047 sounlock(so); 2048 2049 break; 2050 case SO_BINDANY: 2051 if ((error = suser(curproc)) != 0) /* XXX */ 2052 return (error); 2053 /* FALLTHROUGH */ 2054 2055 case SO_DEBUG: 2056 case SO_KEEPALIVE: 2057 case SO_USELOOPBACK: 2058 case SO_BROADCAST: 2059 case SO_REUSEADDR: 2060 case SO_REUSEPORT: 2061 case SO_OOBINLINE: 2062 case SO_TIMESTAMP: 2063 case SO_ZEROIZE: 2064 if (m == NULL || m->m_len < sizeof (int)) 2065 return (EINVAL); 2066 2067 solock(so); 2068 if (*mtod(m, int *)) 2069 so->so_options |= optname; 2070 else 2071 so->so_options &= ~optname; 2072 sounlock(so); 2073 2074 break; 2075 case SO_DONTROUTE: 2076 if (m == NULL || m->m_len < sizeof (int)) 2077 return (EINVAL); 2078 if (*mtod(m, int *)) 2079 error = EOPNOTSUPP; 2080 break; 2081 2082 case SO_SNDBUF: 2083 case SO_RCVBUF: 2084 case SO_SNDLOWAT: 2085 case SO_RCVLOWAT: 2086 { 2087 struct sockbuf *sb = (optname == SO_SNDBUF || 2088 optname == SO_SNDLOWAT ? 2089 &so->so_snd : &so->so_rcv); 2090 u_long cnt; 2091 2092 if (m == NULL || m->m_len < sizeof (int)) 2093 return (EINVAL); 2094 cnt = *mtod(m, int *); 2095 if ((long)cnt <= 0) 2096 cnt = 1; 2097 2098 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2099 solock(so); 2100 mtx_enter(&sb->sb_mtx); 2101 2102 switch (optname) { 2103 case SO_SNDBUF: 2104 case SO_RCVBUF: 2105 if (sb->sb_state & 2106 (SS_CANTSENDMORE | SS_CANTRCVMORE)) { 2107 error = EINVAL; 2108 break; 2109 } 2110 if (sbcheckreserve(cnt, sb->sb_wat) || 2111 sbreserve(so, sb, cnt)) { 2112 error = ENOBUFS; 2113 break; 2114 } 2115 sb->sb_wat = cnt; 2116 break; 2117 case SO_SNDLOWAT: 2118 case SO_RCVLOWAT: 2119 sb->sb_lowat = (cnt > sb->sb_hiwat) ? 2120 sb->sb_hiwat : cnt; 2121 break; 2122 } 2123 2124 mtx_leave(&sb->sb_mtx); 2125 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2126 sounlock(so); 2127 2128 break; 2129 } 2130 2131 case SO_SNDTIMEO: 2132 case SO_RCVTIMEO: 2133 { 2134 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2135 &so->so_snd : &so->so_rcv); 2136 struct timeval tv; 2137 uint64_t nsecs; 2138 2139 if (m == NULL || m->m_len < sizeof (tv)) 2140 return (EINVAL); 2141 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 2142 if (!timerisvalid(&tv)) 2143 return (EINVAL); 2144 nsecs = TIMEVAL_TO_NSEC(&tv); 2145 if (nsecs == UINT64_MAX) 2146 return (EDOM); 2147 if (nsecs == 0) 2148 nsecs = INFSLP; 2149 2150 mtx_enter(&sb->sb_mtx); 2151 sb->sb_timeo_nsecs = nsecs; 2152 mtx_leave(&sb->sb_mtx); 2153 break; 2154 } 2155 2156 case SO_RTABLE: 2157 if (so->so_proto->pr_domain && 2158 so->so_proto->pr_domain->dom_protosw && 2159 so->so_proto->pr_ctloutput) { 2160 const struct domain *dom = 2161 so->so_proto->pr_domain; 2162 2163 level = dom->dom_protosw->pr_protocol; 2164 solock(so); 2165 error = (*so->so_proto->pr_ctloutput) 2166 (PRCO_SETOPT, so, level, optname, m); 2167 sounlock(so); 2168 } else 2169 error = ENOPROTOOPT; 2170 break; 2171 #ifdef SOCKET_SPLICE 2172 case SO_SPLICE: 2173 if (m == NULL) { 2174 error = sosplice(so, -1, 0, NULL); 2175 } else if (m->m_len < sizeof(int)) { 2176 error = EINVAL; 2177 } else if (m->m_len < sizeof(struct splice)) { 2178 error = sosplice(so, *mtod(m, int *), 0, NULL); 2179 } else { 2180 error = sosplice(so, 2181 mtod(m, struct splice *)->sp_fd, 2182 mtod(m, struct splice *)->sp_max, 2183 &mtod(m, struct splice *)->sp_idle); 2184 } 2185 break; 2186 #endif /* SOCKET_SPLICE */ 2187 2188 default: 2189 error = ENOPROTOOPT; 2190 break; 2191 } 2192 } 2193 2194 return (error); 2195 } 2196 2197 int 2198 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 2199 { 2200 int error = 0; 2201 2202 if (level != SOL_SOCKET) { 2203 if (so->so_proto->pr_ctloutput) { 2204 m->m_len = 0; 2205 2206 solock(so); 2207 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 2208 level, optname, m); 2209 sounlock(so); 2210 return (error); 2211 } else 2212 return (ENOPROTOOPT); 2213 } else { 2214 m->m_len = sizeof (int); 2215 2216 switch (optname) { 2217 2218 case SO_LINGER: 2219 m->m_len = sizeof (struct linger); 2220 solock_shared(so); 2221 mtod(m, struct linger *)->l_onoff = 2222 so->so_options & SO_LINGER; 2223 mtod(m, struct linger *)->l_linger = so->so_linger; 2224 sounlock_shared(so); 2225 break; 2226 2227 case SO_BINDANY: 2228 case SO_USELOOPBACK: 2229 case SO_DEBUG: 2230 case SO_KEEPALIVE: 2231 case SO_REUSEADDR: 2232 case SO_REUSEPORT: 2233 case SO_BROADCAST: 2234 case SO_OOBINLINE: 2235 case SO_ACCEPTCONN: 2236 case SO_TIMESTAMP: 2237 case SO_ZEROIZE: 2238 *mtod(m, int *) = so->so_options & optname; 2239 break; 2240 2241 case SO_DONTROUTE: 2242 *mtod(m, int *) = 0; 2243 break; 2244 2245 case SO_TYPE: 2246 *mtod(m, int *) = so->so_type; 2247 break; 2248 2249 case SO_ERROR: 2250 solock(so); 2251 *mtod(m, int *) = so->so_error; 2252 so->so_error = 0; 2253 sounlock(so); 2254 2255 break; 2256 2257 case SO_DOMAIN: 2258 *mtod(m, int *) = so->so_proto->pr_domain->dom_family; 2259 break; 2260 2261 case SO_PROTOCOL: 2262 *mtod(m, int *) = so->so_proto->pr_protocol; 2263 break; 2264 2265 case SO_SNDBUF: 2266 *mtod(m, int *) = so->so_snd.sb_hiwat; 2267 break; 2268 2269 case SO_RCVBUF: 2270 *mtod(m, int *) = so->so_rcv.sb_hiwat; 2271 break; 2272 2273 case SO_SNDLOWAT: 2274 *mtod(m, int *) = so->so_snd.sb_lowat; 2275 break; 2276 2277 case SO_RCVLOWAT: 2278 *mtod(m, int *) = so->so_rcv.sb_lowat; 2279 break; 2280 2281 case SO_SNDTIMEO: 2282 case SO_RCVTIMEO: 2283 { 2284 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2285 &so->so_snd : &so->so_rcv); 2286 struct timeval tv; 2287 uint64_t nsecs; 2288 2289 mtx_enter(&sb->sb_mtx); 2290 nsecs = sb->sb_timeo_nsecs; 2291 mtx_leave(&sb->sb_mtx); 2292 2293 m->m_len = sizeof(struct timeval); 2294 memset(&tv, 0, sizeof(tv)); 2295 if (nsecs != INFSLP) 2296 NSEC_TO_TIMEVAL(nsecs, &tv); 2297 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 2298 break; 2299 } 2300 2301 case SO_RTABLE: 2302 if (so->so_proto->pr_domain && 2303 so->so_proto->pr_domain->dom_protosw && 2304 so->so_proto->pr_ctloutput) { 2305 const struct domain *dom = 2306 so->so_proto->pr_domain; 2307 2308 level = dom->dom_protosw->pr_protocol; 2309 solock(so); 2310 error = (*so->so_proto->pr_ctloutput) 2311 (PRCO_GETOPT, so, level, optname, m); 2312 sounlock(so); 2313 if (error) 2314 return (error); 2315 break; 2316 } 2317 return (ENOPROTOOPT); 2318 2319 #ifdef SOCKET_SPLICE 2320 case SO_SPLICE: 2321 { 2322 off_t len; 2323 2324 m->m_len = sizeof(off_t); 2325 solock_shared(so); 2326 len = so->so_sp ? so->so_sp->ssp_len : 0; 2327 sounlock_shared(so); 2328 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 2329 break; 2330 } 2331 #endif /* SOCKET_SPLICE */ 2332 2333 case SO_PEERCRED: 2334 if (so->so_proto->pr_protocol == AF_UNIX) { 2335 struct unpcb *unp = sotounpcb(so); 2336 2337 solock(so); 2338 if (unp->unp_flags & UNP_FEIDS) { 2339 m->m_len = sizeof(unp->unp_connid); 2340 memcpy(mtod(m, caddr_t), 2341 &(unp->unp_connid), m->m_len); 2342 sounlock(so); 2343 break; 2344 } 2345 sounlock(so); 2346 2347 return (ENOTCONN); 2348 } 2349 return (EOPNOTSUPP); 2350 2351 default: 2352 return (ENOPROTOOPT); 2353 } 2354 return (0); 2355 } 2356 } 2357 2358 void 2359 sohasoutofband(struct socket *so) 2360 { 2361 pgsigio(&so->so_sigio, SIGURG, 0); 2362 knote(&so->so_rcv.sb_klist, 0); 2363 } 2364 2365 void 2366 sofilt_lock(struct socket *so, struct sockbuf *sb) 2367 { 2368 switch (so->so_proto->pr_domain->dom_family) { 2369 case PF_INET: 2370 case PF_INET6: 2371 NET_LOCK_SHARED(); 2372 break; 2373 default: 2374 rw_enter_write(&so->so_lock); 2375 break; 2376 } 2377 2378 mtx_enter(&sb->sb_mtx); 2379 } 2380 2381 void 2382 sofilt_unlock(struct socket *so, struct sockbuf *sb) 2383 { 2384 mtx_leave(&sb->sb_mtx); 2385 2386 switch (so->so_proto->pr_domain->dom_family) { 2387 case PF_INET: 2388 case PF_INET6: 2389 NET_UNLOCK_SHARED(); 2390 break; 2391 default: 2392 rw_exit_write(&so->so_lock); 2393 break; 2394 } 2395 } 2396 2397 int 2398 soo_kqfilter(struct file *fp, struct knote *kn) 2399 { 2400 struct socket *so = kn->kn_fp->f_data; 2401 struct sockbuf *sb; 2402 2403 switch (kn->kn_filter) { 2404 case EVFILT_READ: 2405 kn->kn_fop = &soread_filtops; 2406 sb = &so->so_rcv; 2407 break; 2408 case EVFILT_WRITE: 2409 kn->kn_fop = &sowrite_filtops; 2410 sb = &so->so_snd; 2411 break; 2412 case EVFILT_EXCEPT: 2413 kn->kn_fop = &soexcept_filtops; 2414 sb = &so->so_rcv; 2415 break; 2416 default: 2417 return (EINVAL); 2418 } 2419 2420 klist_insert(&sb->sb_klist, kn); 2421 2422 return (0); 2423 } 2424 2425 void 2426 filt_sordetach(struct knote *kn) 2427 { 2428 struct socket *so = kn->kn_fp->f_data; 2429 2430 klist_remove(&so->so_rcv.sb_klist, kn); 2431 } 2432 2433 int 2434 filt_soread(struct knote *kn, long hint) 2435 { 2436 struct socket *so = kn->kn_fp->f_data; 2437 u_int state = READ_ONCE(so->so_state); 2438 u_int error = READ_ONCE(so->so_error); 2439 int rv = 0; 2440 2441 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2442 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2443 soassertlocked_readonly(so); 2444 2445 if (so->so_options & SO_ACCEPTCONN) { 2446 short qlen = READ_ONCE(so->so_qlen); 2447 2448 if (so->so_rcv.sb_flags & SB_MTXLOCK) 2449 soassertlocked_readonly(so); 2450 2451 kn->kn_data = qlen; 2452 rv = (kn->kn_data != 0); 2453 2454 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { 2455 if (state & SS_ISDISCONNECTED) { 2456 kn->kn_flags |= __EV_HUP; 2457 rv = 1; 2458 } else { 2459 rv = qlen || soreadable(so); 2460 } 2461 } 2462 2463 return rv; 2464 } 2465 2466 kn->kn_data = so->so_rcv.sb_cc; 2467 #ifdef SOCKET_SPLICE 2468 if (isspliced(so)) { 2469 rv = 0; 2470 } else 2471 #endif /* SOCKET_SPLICE */ 2472 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 2473 kn->kn_flags |= EV_EOF; 2474 if (kn->kn_flags & __EV_POLL) { 2475 if (state & SS_ISDISCONNECTED) 2476 kn->kn_flags |= __EV_HUP; 2477 } 2478 kn->kn_fflags = error; 2479 rv = 1; 2480 } else if (error) { 2481 rv = 1; 2482 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2483 rv = (kn->kn_data >= kn->kn_sdata); 2484 } else { 2485 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 2486 } 2487 2488 return rv; 2489 } 2490 2491 void 2492 filt_sowdetach(struct knote *kn) 2493 { 2494 struct socket *so = kn->kn_fp->f_data; 2495 2496 klist_remove(&so->so_snd.sb_klist, kn); 2497 } 2498 2499 int 2500 filt_sowrite(struct knote *kn, long hint) 2501 { 2502 struct socket *so = kn->kn_fp->f_data; 2503 u_int state = READ_ONCE(so->so_state); 2504 u_int error = READ_ONCE(so->so_error); 2505 int rv; 2506 2507 MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); 2508 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2509 soassertlocked_readonly(so); 2510 2511 kn->kn_data = sbspace_locked(so, &so->so_snd); 2512 if (so->so_snd.sb_state & SS_CANTSENDMORE) { 2513 kn->kn_flags |= EV_EOF; 2514 if (kn->kn_flags & __EV_POLL) { 2515 if (state & SS_ISDISCONNECTED) 2516 kn->kn_flags |= __EV_HUP; 2517 } 2518 kn->kn_fflags = error; 2519 rv = 1; 2520 } else if (error) { 2521 rv = 1; 2522 } else if (((state & SS_ISCONNECTED) == 0) && 2523 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 2524 rv = 0; 2525 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2526 rv = (kn->kn_data >= kn->kn_sdata); 2527 } else { 2528 rv = (kn->kn_data >= so->so_snd.sb_lowat); 2529 } 2530 2531 return (rv); 2532 } 2533 2534 int 2535 filt_soexcept(struct knote *kn, long hint) 2536 { 2537 struct socket *so = kn->kn_fp->f_data; 2538 int rv = 0; 2539 2540 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2541 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2542 soassertlocked_readonly(so); 2543 2544 #ifdef SOCKET_SPLICE 2545 if (isspliced(so)) { 2546 rv = 0; 2547 } else 2548 #endif /* SOCKET_SPLICE */ 2549 if (kn->kn_sfflags & NOTE_OOB) { 2550 if (so->so_oobmark || (so->so_rcv.sb_state & SS_RCVATMARK)) { 2551 kn->kn_fflags |= NOTE_OOB; 2552 kn->kn_data -= so->so_oobmark; 2553 rv = 1; 2554 } 2555 } 2556 2557 if (kn->kn_flags & __EV_POLL) { 2558 u_int state = READ_ONCE(so->so_state); 2559 2560 if (state & SS_ISDISCONNECTED) { 2561 kn->kn_flags |= __EV_HUP; 2562 rv = 1; 2563 } 2564 } 2565 2566 return rv; 2567 } 2568 2569 int 2570 filt_sowmodify(struct kevent *kev, struct knote *kn) 2571 { 2572 struct socket *so = kn->kn_fp->f_data; 2573 int rv; 2574 2575 sofilt_lock(so, &so->so_snd); 2576 rv = knote_modify(kev, kn); 2577 sofilt_unlock(so, &so->so_snd); 2578 2579 return (rv); 2580 } 2581 2582 int 2583 filt_sowprocess(struct knote *kn, struct kevent *kev) 2584 { 2585 struct socket *so = kn->kn_fp->f_data; 2586 int rv; 2587 2588 sofilt_lock(so, &so->so_snd); 2589 rv = knote_process(kn, kev); 2590 sofilt_unlock(so, &so->so_snd); 2591 2592 return (rv); 2593 } 2594 2595 int 2596 filt_sormodify(struct kevent *kev, struct knote *kn) 2597 { 2598 struct socket *so = kn->kn_fp->f_data; 2599 int rv; 2600 2601 sofilt_lock(so, &so->so_rcv); 2602 rv = knote_modify(kev, kn); 2603 sofilt_unlock(so, &so->so_rcv); 2604 2605 return (rv); 2606 } 2607 2608 int 2609 filt_sorprocess(struct knote *kn, struct kevent *kev) 2610 { 2611 struct socket *so = kn->kn_fp->f_data; 2612 int rv; 2613 2614 sofilt_lock(so, &so->so_rcv); 2615 rv = knote_process(kn, kev); 2616 sofilt_unlock(so, &so->so_rcv); 2617 2618 return (rv); 2619 } 2620 2621 #ifdef DDB 2622 void 2623 sobuf_print(struct sockbuf *, 2624 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2625 2626 void 2627 sobuf_print(struct sockbuf *sb, 2628 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2629 { 2630 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2631 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2632 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2633 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2634 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2635 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2636 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2637 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2638 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2639 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2640 (*pr)("\tsb_flags: %04x\n", sb->sb_flags); 2641 (*pr)("\tsb_state: %04x\n", sb->sb_state); 2642 (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); 2643 } 2644 2645 void 2646 so_print(void *v, 2647 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2648 { 2649 struct socket *so = v; 2650 2651 (*pr)("socket %p\n", so); 2652 (*pr)("so_type: %i\n", so->so_type); 2653 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2654 (*pr)("so_linger: %i\n", so->so_linger); 2655 (*pr)("so_state: 0x%04x\n", so->so_state); 2656 (*pr)("so_pcb: %p\n", so->so_pcb); 2657 (*pr)("so_proto: %p\n", so->so_proto); 2658 (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio); 2659 2660 (*pr)("so_head: %p\n", so->so_head); 2661 (*pr)("so_onq: %p\n", so->so_onq); 2662 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2663 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2664 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2665 (*pr)("so_q0len: %i\n", so->so_q0len); 2666 (*pr)("so_qlen: %i\n", so->so_qlen); 2667 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2668 (*pr)("so_timeo: %i\n", so->so_timeo); 2669 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2670 2671 (*pr)("so_sp: %p\n", so->so_sp); 2672 if (so->so_sp != NULL) { 2673 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2674 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2675 (*pr)("\tssp_len: %lld\n", 2676 (unsigned long long)so->so_sp->ssp_len); 2677 (*pr)("\tssp_max: %lld\n", 2678 (unsigned long long)so->so_sp->ssp_max); 2679 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2680 so->so_sp->ssp_idletv.tv_usec); 2681 (*pr)("\tssp_idleto: %spending (@%i)\n", 2682 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2683 so->so_sp->ssp_idleto.to_time); 2684 } 2685 2686 (*pr)("so_rcv:\n"); 2687 sobuf_print(&so->so_rcv, pr); 2688 (*pr)("so_snd:\n"); 2689 sobuf_print(&so->so_snd, pr); 2690 2691 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2692 so->so_upcall, so->so_upcallarg); 2693 2694 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2695 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2696 (*pr)("so_cpid: %d\n", so->so_cpid); 2697 } 2698 #endif 2699