1 /* $OpenBSD: uipc_socket.c,v 1.350 2024/12/27 13:08:11 mvs Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/unpcb.h> 47 #include <sys/socketvar.h> 48 #include <sys/signalvar.h> 49 #include <sys/pool.h> 50 #include <sys/atomic.h> 51 #include <sys/rwlock.h> 52 #include <sys/time.h> 53 #include <sys/refcnt.h> 54 55 #ifdef DDB 56 #include <machine/db_machdep.h> 57 #endif 58 59 void sbsync(struct sockbuf *, struct mbuf *); 60 61 int sosplice(struct socket *, int, off_t, struct timeval *); 62 void sounsplice(struct socket *, struct socket *, int); 63 void soidle(void *); 64 void sotask(void *); 65 void soreaper(void *); 66 void soput(void *); 67 int somove(struct socket *, int); 68 void sorflush(struct socket *); 69 70 void filt_sordetach(struct knote *kn); 71 int filt_soread(struct knote *kn, long hint); 72 void filt_sowdetach(struct knote *kn); 73 int filt_sowrite(struct knote *kn, long hint); 74 int filt_soexcept(struct knote *kn, long hint); 75 76 int filt_sowmodify(struct kevent *kev, struct knote *kn); 77 int filt_sowprocess(struct knote *kn, struct kevent *kev); 78 79 int filt_sormodify(struct kevent *kev, struct knote *kn); 80 int filt_sorprocess(struct knote *kn, struct kevent *kev); 81 82 const struct filterops soread_filtops = { 83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 84 .f_attach = NULL, 85 .f_detach = filt_sordetach, 86 .f_event = filt_soread, 87 .f_modify = filt_sormodify, 88 .f_process = filt_sorprocess, 89 }; 90 91 const struct filterops sowrite_filtops = { 92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 93 .f_attach = NULL, 94 .f_detach = filt_sowdetach, 95 .f_event = filt_sowrite, 96 .f_modify = filt_sowmodify, 97 .f_process = filt_sowprocess, 98 }; 99 100 const struct filterops soexcept_filtops = { 101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 102 .f_attach = NULL, 103 .f_detach = filt_sordetach, 104 .f_event = filt_soexcept, 105 .f_modify = filt_sormodify, 106 .f_process = filt_sorprocess, 107 }; 108 109 #ifndef SOMINCONN 110 #define SOMINCONN 80 111 #endif /* SOMINCONN */ 112 113 int somaxconn = SOMAXCONN; 114 int sominconn = SOMINCONN; 115 116 struct pool socket_pool; 117 #ifdef SOCKET_SPLICE 118 struct pool sosplice_pool; 119 struct taskq *sosplice_taskq; 120 struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); 121 #endif 122 123 void 124 soinit(void) 125 { 126 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 127 "sockpl", NULL); 128 #ifdef SOCKET_SPLICE 129 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 130 "sosppl", NULL); 131 #endif 132 } 133 134 struct socket * 135 soalloc(const struct protosw *prp, int wait) 136 { 137 const struct domain *dp = prp->pr_domain; 138 struct socket *so; 139 140 so = pool_get(&socket_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 141 PR_ZERO); 142 if (so == NULL) 143 return (NULL); 144 rw_init_flags(&so->so_lock, dp->dom_name, RWL_DUPOK); 145 refcnt_init(&so->so_refcnt); 146 rw_init(&so->so_rcv.sb_lock, "sbufrcv"); 147 rw_init(&so->so_snd.sb_lock, "sbufsnd"); 148 mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0); 149 mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0); 150 klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx); 151 klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx); 152 sigio_init(&so->so_sigio); 153 TAILQ_INIT(&so->so_q0); 154 TAILQ_INIT(&so->so_q); 155 156 switch (dp->dom_family) { 157 case AF_INET: 158 case AF_INET6: 159 switch (prp->pr_type) { 160 case SOCK_RAW: 161 case SOCK_DGRAM: 162 so->so_snd.sb_flags |= SB_MTXLOCK; 163 /* FALLTHROUGH */ 164 case SOCK_STREAM: 165 so->so_rcv.sb_flags |= SB_MTXLOCK; 166 break; 167 } 168 break; 169 case AF_KEY: 170 case AF_ROUTE: 171 case AF_UNIX: 172 case AF_FRAME: 173 so->so_snd.sb_flags |= SB_MTXLOCK; 174 so->so_rcv.sb_flags |= SB_MTXLOCK; 175 break; 176 } 177 178 return (so); 179 } 180 181 /* 182 * Socket operation routines. 183 * These routines are called by the routines in 184 * sys_socket.c or from a system process, and 185 * implement the semantics of socket operations by 186 * switching out to the protocol specific routines. 187 */ 188 int 189 socreate(int dom, struct socket **aso, int type, int proto) 190 { 191 struct proc *p = curproc; /* XXX */ 192 const struct protosw *prp; 193 struct socket *so; 194 int error; 195 196 if (proto) 197 prp = pffindproto(dom, proto, type); 198 else 199 prp = pffindtype(dom, type); 200 if (prp == NULL || prp->pr_usrreqs == NULL) 201 return (EPROTONOSUPPORT); 202 if (prp->pr_type != type) 203 return (EPROTOTYPE); 204 so = soalloc(prp, M_WAIT); 205 so->so_type = type; 206 if (suser(p) == 0) 207 so->so_state = SS_PRIV; 208 so->so_ruid = p->p_ucred->cr_ruid; 209 so->so_euid = p->p_ucred->cr_uid; 210 so->so_rgid = p->p_ucred->cr_rgid; 211 so->so_egid = p->p_ucred->cr_gid; 212 so->so_cpid = p->p_p->ps_pid; 213 so->so_proto = prp; 214 so->so_snd.sb_timeo_nsecs = INFSLP; 215 so->so_rcv.sb_timeo_nsecs = INFSLP; 216 217 solock(so); 218 error = pru_attach(so, proto, M_WAIT); 219 if (error) { 220 so->so_state |= SS_NOFDREF; 221 /* sofree() calls sounlock(). */ 222 sofree(so, 0); 223 return (error); 224 } 225 sounlock(so); 226 *aso = so; 227 return (0); 228 } 229 230 int 231 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 232 { 233 soassertlocked(so); 234 return pru_bind(so, nam, p); 235 } 236 237 int 238 solisten(struct socket *so, int backlog) 239 { 240 int somaxconn_local = atomic_load_int(&somaxconn); 241 int sominconn_local = atomic_load_int(&sominconn); 242 int error; 243 244 switch (so->so_type) { 245 case SOCK_STREAM: 246 case SOCK_SEQPACKET: 247 break; 248 default: 249 return (EOPNOTSUPP); 250 } 251 252 soassertlocked(so); 253 254 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 255 return (EINVAL); 256 #ifdef SOCKET_SPLICE 257 if (isspliced(so) || issplicedback(so)) 258 return (EOPNOTSUPP); 259 #endif /* SOCKET_SPLICE */ 260 error = pru_listen(so); 261 if (error) 262 return (error); 263 if (TAILQ_FIRST(&so->so_q) == NULL) 264 so->so_options |= SO_ACCEPTCONN; 265 if (backlog < 0 || backlog > somaxconn_local) 266 backlog = somaxconn_local; 267 if (backlog < sominconn_local) 268 backlog = sominconn_local; 269 so->so_qlimit = backlog; 270 return (0); 271 } 272 273 #define SOSP_FREEING_READ 1 274 #define SOSP_FREEING_WRITE 2 275 void 276 sofree(struct socket *so, int keep_lock) 277 { 278 int persocket = solock_persocket(so); 279 280 soassertlocked(so); 281 282 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { 283 if (!keep_lock) 284 sounlock(so); 285 return; 286 } 287 if (so->so_head) { 288 struct socket *head = so->so_head; 289 290 /* 291 * We must not decommission a socket that's on the accept(2) 292 * queue. If we do, then accept(2) may hang after select(2) 293 * indicated that the listening socket was ready. 294 */ 295 if (so->so_onq == &head->so_q) { 296 if (!keep_lock) 297 sounlock(so); 298 return; 299 } 300 301 if (persocket) { 302 /* 303 * Concurrent close of `head' could 304 * abort `so' due to re-lock. 305 */ 306 soref(so); 307 soref(head); 308 sounlock(so); 309 solock(head); 310 solock(so); 311 312 if (so->so_onq != &head->so_q0) { 313 sounlock(head); 314 sounlock(so); 315 sorele(head); 316 sorele(so); 317 return; 318 } 319 320 sorele(head); 321 sorele(so); 322 } 323 324 soqremque(so, 0); 325 326 if (persocket) 327 sounlock(head); 328 } 329 330 switch (so->so_proto->pr_domain->dom_family) { 331 case AF_INET: 332 case AF_INET6: 333 if (so->so_proto->pr_type == SOCK_STREAM) 334 break; 335 /* FALLTHROUGH */ 336 default: 337 sounlock(so); 338 refcnt_finalize(&so->so_refcnt, "sofinal"); 339 solock(so); 340 break; 341 } 342 343 sigio_free(&so->so_sigio); 344 klist_free(&so->so_rcv.sb_klist); 345 klist_free(&so->so_snd.sb_klist); 346 347 mtx_enter(&so->so_snd.sb_mtx); 348 sbrelease(so, &so->so_snd); 349 mtx_leave(&so->so_snd.sb_mtx); 350 351 /* 352 * Unlocked dispose and cleanup is safe. Socket is unlinked 353 * from everywhere. Even concurrent sotask() thread will not 354 * call somove(). 355 */ 356 if (so->so_proto->pr_flags & PR_RIGHTS && 357 so->so_proto->pr_domain->dom_dispose) 358 (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 359 m_purge(so->so_rcv.sb_mb); 360 361 if (!keep_lock) 362 sounlock(so); 363 364 #ifdef SOCKET_SPLICE 365 if (so->so_sp) { 366 /* Reuse splice idle, sounsplice() has been called before. */ 367 timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, 368 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 369 timeout_add(&so->so_sp->ssp_idleto, 0); 370 } else 371 #endif /* SOCKET_SPLICE */ 372 { 373 pool_put(&socket_pool, so); 374 } 375 } 376 377 static inline uint64_t 378 solinger_nsec(struct socket *so) 379 { 380 if (so->so_linger == 0) 381 return INFSLP; 382 383 return SEC_TO_NSEC(so->so_linger); 384 } 385 386 /* 387 * Close a socket on last file table reference removal. 388 * Initiate disconnect if connected. 389 * Free socket when disconnect complete. 390 */ 391 int 392 soclose(struct socket *so, int flags) 393 { 394 struct socket *so2; 395 int error = 0; 396 397 solock(so); 398 /* Revoke async IO early. There is a final revocation in sofree(). */ 399 sigio_free(&so->so_sigio); 400 if (so->so_state & SS_ISCONNECTED) { 401 if (so->so_pcb == NULL) 402 goto discard; 403 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 404 error = sodisconnect(so); 405 if (error) 406 goto drop; 407 } 408 if (so->so_options & SO_LINGER) { 409 if ((so->so_state & SS_ISDISCONNECTING) && 410 (flags & MSG_DONTWAIT)) 411 goto drop; 412 while (so->so_state & SS_ISCONNECTED) { 413 error = sosleep_nsec(so, &so->so_timeo, 414 PSOCK | PCATCH, "netcls", 415 solinger_nsec(so)); 416 if (error) 417 break; 418 } 419 } 420 } 421 drop: 422 if (so->so_pcb) { 423 int error2; 424 error2 = pru_detach(so); 425 if (error == 0) 426 error = error2; 427 } 428 if (so->so_options & SO_ACCEPTCONN) { 429 int persocket = solock_persocket(so); 430 431 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 432 if (persocket) 433 solock(so2); 434 (void) soqremque(so2, 0); 435 if (persocket) 436 sounlock(so); 437 soabort(so2); 438 if (persocket) 439 solock(so); 440 } 441 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 442 if (persocket) 443 solock(so2); 444 (void) soqremque(so2, 1); 445 if (persocket) 446 sounlock(so); 447 soabort(so2); 448 if (persocket) 449 solock(so); 450 } 451 } 452 discard: 453 if (so->so_state & SS_NOFDREF) 454 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 455 so->so_state |= SS_NOFDREF; 456 457 #ifdef SOCKET_SPLICE 458 if (so->so_sp) { 459 struct socket *soback; 460 461 if (so->so_proto->pr_flags & PR_WANTRCVD) { 462 /* 463 * Copy - Paste, but can't relock and sleep in 464 * sofree() in tcp(4) case. That's why tcp(4) 465 * still rely on solock() for splicing and 466 * unsplicing. 467 */ 468 469 if (issplicedback(so)) { 470 int freeing = SOSP_FREEING_WRITE; 471 472 if (so->so_sp->ssp_soback == so) 473 freeing |= SOSP_FREEING_READ; 474 sounsplice(so->so_sp->ssp_soback, so, freeing); 475 } 476 if (isspliced(so)) { 477 int freeing = SOSP_FREEING_READ; 478 479 if (so == so->so_sp->ssp_socket) 480 freeing |= SOSP_FREEING_WRITE; 481 sounsplice(so, so->so_sp->ssp_socket, freeing); 482 } 483 goto free; 484 } 485 486 sounlock(so); 487 mtx_enter(&so->so_snd.sb_mtx); 488 /* 489 * Concurrent sounsplice() locks `sb_mtx' mutexes on 490 * both `so_snd' and `so_rcv' before unsplice sockets. 491 */ 492 if ((soback = so->so_sp->ssp_soback) == NULL) { 493 mtx_leave(&so->so_snd.sb_mtx); 494 goto notsplicedback; 495 } 496 soref(soback); 497 mtx_leave(&so->so_snd.sb_mtx); 498 499 /* 500 * `so' can be only unspliced, and never spliced again. 501 * Thus if issplicedback(so) check is positive, socket is 502 * still spliced and `ssp_soback' points to the same 503 * socket that `soback'. 504 */ 505 sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR); 506 if (issplicedback(so)) { 507 int freeing = SOSP_FREEING_WRITE; 508 509 if (so->so_sp->ssp_soback == so) 510 freeing |= SOSP_FREEING_READ; 511 solock(soback); 512 sounsplice(so->so_sp->ssp_soback, so, freeing); 513 sounlock(soback); 514 } 515 sbunlock(&soback->so_rcv); 516 sorele(soback); 517 518 notsplicedback: 519 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 520 if (isspliced(so)) { 521 int freeing = SOSP_FREEING_READ; 522 523 if (so == so->so_sp->ssp_socket) 524 freeing |= SOSP_FREEING_WRITE; 525 solock(so); 526 sounsplice(so, so->so_sp->ssp_socket, freeing); 527 sounlock(so); 528 } 529 sbunlock(&so->so_rcv); 530 531 solock(so); 532 } 533 free: 534 #endif /* SOCKET_SPLICE */ 535 /* sofree() calls sounlock(). */ 536 sofree(so, 0); 537 return (error); 538 } 539 540 void 541 soabort(struct socket *so) 542 { 543 soassertlocked(so); 544 pru_abort(so); 545 } 546 547 int 548 soaccept(struct socket *so, struct mbuf *nam) 549 { 550 int error = 0; 551 552 soassertlocked(so); 553 554 if ((so->so_state & SS_NOFDREF) == 0) 555 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 556 so->so_state &= ~SS_NOFDREF; 557 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 558 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 559 error = pru_accept(so, nam); 560 else 561 error = ECONNABORTED; 562 return (error); 563 } 564 565 int 566 soconnect(struct socket *so, struct mbuf *nam) 567 { 568 int error; 569 570 soassertlocked(so); 571 572 if (so->so_options & SO_ACCEPTCONN) 573 return (EOPNOTSUPP); 574 /* 575 * If protocol is connection-based, can only connect once. 576 * Otherwise, if connected, try to disconnect first. 577 * This allows user to disconnect by connecting to, e.g., 578 * a null address. 579 */ 580 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 581 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 582 (error = sodisconnect(so)))) 583 error = EISCONN; 584 else 585 error = pru_connect(so, nam); 586 return (error); 587 } 588 589 int 590 soconnect2(struct socket *so1, struct socket *so2) 591 { 592 int persocket, error; 593 594 if ((persocket = solock_persocket(so1))) 595 solock_pair(so1, so2); 596 else 597 solock(so1); 598 599 error = pru_connect2(so1, so2); 600 601 if (persocket) 602 sounlock(so2); 603 sounlock(so1); 604 return (error); 605 } 606 607 int 608 sodisconnect(struct socket *so) 609 { 610 int error; 611 612 soassertlocked(so); 613 614 if ((so->so_state & SS_ISCONNECTED) == 0) 615 return (ENOTCONN); 616 if (so->so_state & SS_ISDISCONNECTING) 617 return (EALREADY); 618 error = pru_disconnect(so); 619 return (error); 620 } 621 622 int m_getuio(struct mbuf **, int, long, struct uio *); 623 624 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 625 /* 626 * Send on a socket. 627 * If send must go all at once and message is larger than 628 * send buffering, then hard error. 629 * Lock against other senders. 630 * If must go all at once and not enough room now, then 631 * inform user that this would block and do nothing. 632 * Otherwise, if nonblocking, send as much as possible. 633 * The data to be sent is described by "uio" if nonzero, 634 * otherwise by the mbuf chain "top" (which must be null 635 * if uio is not). Data provided in mbuf chain must be small 636 * enough to send all at once. 637 * 638 * Returns nonzero on error, timeout or signal; callers 639 * must check for short counts if EINTR/ERESTART are returned. 640 * Data and control buffers are freed on return. 641 */ 642 int 643 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 644 struct mbuf *control, int flags) 645 { 646 long space, clen = 0; 647 size_t resid; 648 int error; 649 int atomic = sosendallatonce(so) || top; 650 int dosolock = ((so->so_snd.sb_flags & SB_MTXLOCK) == 0); 651 652 if (uio) 653 resid = uio->uio_resid; 654 else 655 resid = top->m_pkthdr.len; 656 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 657 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 658 m_freem(top); 659 m_freem(control); 660 return (EINVAL); 661 } 662 if (uio && uio->uio_procp) 663 uio->uio_procp->p_ru.ru_msgsnd++; 664 if (control) { 665 /* 666 * In theory clen should be unsigned (since control->m_len is). 667 * However, space must be signed, as it might be less than 0 668 * if we over-committed, and we must use a signed comparison 669 * of space and clen. 670 */ 671 clen = control->m_len; 672 /* reserve extra space for AF_UNIX's internalize */ 673 if (so->so_proto->pr_domain->dom_family == AF_UNIX && 674 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 675 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 676 clen = CMSG_SPACE( 677 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 678 (sizeof(struct fdpass) / sizeof(int))); 679 } 680 681 #define snderr(errno) { error = errno; goto release; } 682 683 restart: 684 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 685 goto out; 686 if (dosolock) 687 solock_shared(so); 688 sb_mtx_lock(&so->so_snd); 689 so->so_snd.sb_state |= SS_ISSENDING; 690 do { 691 if (so->so_snd.sb_state & SS_CANTSENDMORE) 692 snderr(EPIPE); 693 if ((error = READ_ONCE(so->so_error))) { 694 so->so_error = 0; 695 snderr(error); 696 } 697 if ((so->so_state & SS_ISCONNECTED) == 0) { 698 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 699 if (!(resid == 0 && clen != 0)) 700 snderr(ENOTCONN); 701 } else if (addr == NULL) 702 snderr(EDESTADDRREQ); 703 } 704 space = sbspace_locked(so, &so->so_snd); 705 if (flags & MSG_OOB) 706 space += 1024; 707 if (so->so_proto->pr_domain->dom_family == AF_UNIX) { 708 if (atomic && resid > so->so_snd.sb_hiwat) 709 snderr(EMSGSIZE); 710 } else { 711 if (clen > so->so_snd.sb_hiwat || 712 (atomic && resid > so->so_snd.sb_hiwat - clen)) 713 snderr(EMSGSIZE); 714 } 715 if (space < clen || 716 (space - clen < resid && 717 (atomic || space < so->so_snd.sb_lowat))) { 718 if (flags & MSG_DONTWAIT) 719 snderr(EWOULDBLOCK); 720 sbunlock(&so->so_snd); 721 error = sbwait(so, &so->so_snd); 722 so->so_snd.sb_state &= ~SS_ISSENDING; 723 sb_mtx_unlock(&so->so_snd); 724 if (dosolock) 725 sounlock_shared(so); 726 if (error) 727 goto out; 728 goto restart; 729 } 730 space -= clen; 731 do { 732 if (uio == NULL) { 733 /* 734 * Data is prepackaged in "top". 735 */ 736 resid = 0; 737 if (flags & MSG_EOR) 738 top->m_flags |= M_EOR; 739 } else { 740 sb_mtx_unlock(&so->so_snd); 741 if (dosolock) 742 sounlock_shared(so); 743 error = m_getuio(&top, atomic, space, uio); 744 if (dosolock) 745 solock_shared(so); 746 sb_mtx_lock(&so->so_snd); 747 if (error) 748 goto release; 749 space -= top->m_pkthdr.len; 750 resid = uio->uio_resid; 751 if (flags & MSG_EOR) 752 top->m_flags |= M_EOR; 753 } 754 if (resid == 0) 755 so->so_snd.sb_state &= ~SS_ISSENDING; 756 if (top && so->so_options & SO_ZEROIZE) 757 top->m_flags |= M_ZEROIZE; 758 sb_mtx_unlock(&so->so_snd); 759 if (!dosolock) 760 solock_shared(so); 761 if (flags & MSG_OOB) 762 error = pru_sendoob(so, top, addr, control); 763 else 764 error = pru_send(so, top, addr, control); 765 if (!dosolock) 766 sounlock_shared(so); 767 sb_mtx_lock(&so->so_snd); 768 clen = 0; 769 control = NULL; 770 top = NULL; 771 if (error) 772 goto release; 773 } while (resid && space > 0); 774 } while (resid); 775 776 release: 777 so->so_snd.sb_state &= ~SS_ISSENDING; 778 sb_mtx_unlock(&so->so_snd); 779 if (dosolock) 780 sounlock_shared(so); 781 sbunlock(&so->so_snd); 782 out: 783 m_freem(top); 784 m_freem(control); 785 return (error); 786 } 787 788 int 789 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 790 { 791 struct mbuf *m, *top = NULL; 792 struct mbuf **nextp = ⊤ 793 u_long len, mlen; 794 size_t resid = uio->uio_resid; 795 int error; 796 797 do { 798 if (top == NULL) { 799 MGETHDR(m, M_WAIT, MT_DATA); 800 mlen = MHLEN; 801 } else { 802 MGET(m, M_WAIT, MT_DATA); 803 mlen = MLEN; 804 } 805 /* chain mbuf together */ 806 *nextp = m; 807 nextp = &m->m_next; 808 809 resid = ulmin(resid, space); 810 if (resid >= MINCLSIZE) { 811 MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); 812 if ((m->m_flags & M_EXT) == 0) 813 MCLGETL(m, M_NOWAIT, MCLBYTES); 814 if ((m->m_flags & M_EXT) == 0) 815 goto nopages; 816 mlen = m->m_ext.ext_size; 817 len = ulmin(mlen, resid); 818 /* 819 * For datagram protocols, leave room 820 * for protocol headers in first mbuf. 821 */ 822 if (atomic && m == top && len < mlen - max_hdr) 823 m->m_data += max_hdr; 824 } else { 825 nopages: 826 len = ulmin(mlen, resid); 827 /* 828 * For datagram protocols, leave room 829 * for protocol headers in first mbuf. 830 */ 831 if (atomic && m == top && len < mlen - max_hdr) 832 m_align(m, len); 833 } 834 835 error = uiomove(mtod(m, caddr_t), len, uio); 836 if (error) { 837 m_freem(top); 838 return (error); 839 } 840 841 /* adjust counters */ 842 resid = uio->uio_resid; 843 space -= len; 844 m->m_len = len; 845 top->m_pkthdr.len += len; 846 847 /* Is there more space and more data? */ 848 } while (space > 0 && resid > 0); 849 850 *mp = top; 851 return 0; 852 } 853 854 /* 855 * Following replacement or removal of the first mbuf on the first 856 * mbuf chain of a socket buffer, push necessary state changes back 857 * into the socket buffer so that other consumers see the values 858 * consistently. 'nextrecord' is the callers locally stored value of 859 * the original value of sb->sb_mb->m_nextpkt which must be restored 860 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 861 */ 862 void 863 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 864 { 865 866 /* 867 * First, update for the new value of nextrecord. If necessary, 868 * make it the first record. 869 */ 870 if (sb->sb_mb != NULL) 871 sb->sb_mb->m_nextpkt = nextrecord; 872 else 873 sb->sb_mb = nextrecord; 874 875 /* 876 * Now update any dependent socket buffer fields to reflect 877 * the new state. This is an inline of SB_EMPTY_FIXUP, with 878 * the addition of a second clause that takes care of the 879 * case where sb_mb has been updated, but remains the last 880 * record. 881 */ 882 if (sb->sb_mb == NULL) { 883 sb->sb_mbtail = NULL; 884 sb->sb_lastrecord = NULL; 885 } else if (sb->sb_mb->m_nextpkt == NULL) 886 sb->sb_lastrecord = sb->sb_mb; 887 } 888 889 /* 890 * Implement receive operations on a socket. 891 * We depend on the way that records are added to the sockbuf 892 * by sbappend*. In particular, each record (mbufs linked through m_next) 893 * must begin with an address if the protocol so specifies, 894 * followed by an optional mbuf or mbufs containing ancillary data, 895 * and then zero or more mbufs of data. 896 * In order to avoid blocking network for the entire time here, we release 897 * the solock() while doing the actual copy to user space. 898 * Although the sockbuf is locked, new data may still be appended, 899 * and thus we must maintain consistency of the sockbuf during that time. 900 * 901 * The caller may receive the data as a single mbuf chain by supplying 902 * an mbuf **mp0 for use in returning the chain. The uio is then used 903 * only for the count in uio_resid. 904 */ 905 int 906 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 907 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 908 socklen_t controllen) 909 { 910 struct mbuf *m, **mp; 911 struct mbuf *cm; 912 u_long len, offset, moff; 913 int flags, error, error2, type, uio_error = 0; 914 const struct protosw *pr = so->so_proto; 915 struct mbuf *nextrecord; 916 size_t resid, orig_resid = uio->uio_resid; 917 int dosolock = ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0); 918 919 mp = mp0; 920 if (paddr) 921 *paddr = NULL; 922 if (controlp) 923 *controlp = NULL; 924 if (flagsp) 925 flags = *flagsp &~ MSG_EOR; 926 else 927 flags = 0; 928 if (flags & MSG_OOB) { 929 m = m_get(M_WAIT, MT_DATA); 930 solock_shared(so); 931 error = pru_rcvoob(so, m, flags & MSG_PEEK); 932 sounlock_shared(so); 933 if (error) 934 goto bad; 935 do { 936 error = uiomove(mtod(m, caddr_t), 937 ulmin(uio->uio_resid, m->m_len), uio); 938 m = m_free(m); 939 } while (uio->uio_resid && error == 0 && m); 940 bad: 941 m_freem(m); 942 return (error); 943 } 944 if (mp) 945 *mp = NULL; 946 947 restart: 948 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 949 return (error); 950 if (dosolock) 951 solock_shared(so); 952 sb_mtx_lock(&so->so_rcv); 953 954 m = so->so_rcv.sb_mb; 955 #ifdef SOCKET_SPLICE 956 if (isspliced(so)) 957 m = NULL; 958 #endif /* SOCKET_SPLICE */ 959 /* 960 * If we have less data than requested, block awaiting more 961 * (subject to any timeout) if: 962 * 1. the current count is less than the low water mark, 963 * 2. MSG_WAITALL is set, and it is possible to do the entire 964 * receive operation at once if we block (resid <= hiwat), or 965 * 3. MSG_DONTWAIT is not set. 966 * If MSG_WAITALL is set but resid is larger than the receive buffer, 967 * we have to do the receive in sections, and thus risk returning 968 * a short count if a timeout or signal occurs after we start. 969 */ 970 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 971 so->so_rcv.sb_cc < uio->uio_resid) && 972 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 973 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 974 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 975 #ifdef DIAGNOSTIC 976 if (m == NULL && so->so_rcv.sb_cc) 977 #ifdef SOCKET_SPLICE 978 if (!isspliced(so)) 979 #endif /* SOCKET_SPLICE */ 980 panic("receive 1: so %p, so_type %d, sb_cc %lu", 981 so, so->so_type, so->so_rcv.sb_cc); 982 #endif 983 if ((error2 = READ_ONCE(so->so_error))) { 984 if (m) 985 goto dontblock; 986 error = error2; 987 if ((flags & MSG_PEEK) == 0) 988 so->so_error = 0; 989 goto release; 990 } 991 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 992 if (m) 993 goto dontblock; 994 else if (so->so_rcv.sb_cc == 0) 995 goto release; 996 } 997 for (; m; m = m->m_next) 998 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 999 m = so->so_rcv.sb_mb; 1000 goto dontblock; 1001 } 1002 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1003 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1004 error = ENOTCONN; 1005 goto release; 1006 } 1007 if (uio->uio_resid == 0 && controlp == NULL) 1008 goto release; 1009 if (flags & MSG_DONTWAIT) { 1010 error = EWOULDBLOCK; 1011 goto release; 1012 } 1013 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 1014 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 1015 1016 sbunlock(&so->so_rcv); 1017 error = sbwait(so, &so->so_rcv); 1018 sb_mtx_unlock(&so->so_rcv); 1019 if (dosolock) 1020 sounlock_shared(so); 1021 if (error) 1022 return (error); 1023 goto restart; 1024 } 1025 dontblock: 1026 /* 1027 * On entry here, m points to the first record of the socket buffer. 1028 * From this point onward, we maintain 'nextrecord' as a cache of the 1029 * pointer to the next record in the socket buffer. We must keep the 1030 * various socket buffer pointers and local stack versions of the 1031 * pointers in sync, pushing out modifications before operations that 1032 * may sleep, and re-reading them afterwards. 1033 * 1034 * Otherwise, we will race with the network stack appending new data 1035 * or records onto the socket buffer by using inconsistent/stale 1036 * versions of the field, possibly resulting in socket buffer 1037 * corruption. 1038 */ 1039 if (uio->uio_procp) 1040 uio->uio_procp->p_ru.ru_msgrcv++; 1041 KASSERT(m == so->so_rcv.sb_mb); 1042 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 1043 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 1044 nextrecord = m->m_nextpkt; 1045 if (pr->pr_flags & PR_ADDR) { 1046 #ifdef DIAGNOSTIC 1047 if (m->m_type != MT_SONAME) 1048 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 1049 so, so->so_type, m, m->m_type); 1050 #endif 1051 orig_resid = 0; 1052 if (flags & MSG_PEEK) { 1053 if (paddr) 1054 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 1055 m = m->m_next; 1056 } else { 1057 sbfree(so, &so->so_rcv, m); 1058 if (paddr) { 1059 *paddr = m; 1060 so->so_rcv.sb_mb = m->m_next; 1061 m->m_next = NULL; 1062 m = so->so_rcv.sb_mb; 1063 } else { 1064 so->so_rcv.sb_mb = m_free(m); 1065 m = so->so_rcv.sb_mb; 1066 } 1067 sbsync(&so->so_rcv, nextrecord); 1068 } 1069 } 1070 while (m && m->m_type == MT_CONTROL && error == 0) { 1071 int skip = 0; 1072 if (flags & MSG_PEEK) { 1073 if (mtod(m, struct cmsghdr *)->cmsg_type == 1074 SCM_RIGHTS) { 1075 /* don't leak internalized SCM_RIGHTS msgs */ 1076 skip = 1; 1077 } else if (controlp) 1078 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 1079 m = m->m_next; 1080 } else { 1081 sbfree(so, &so->so_rcv, m); 1082 so->so_rcv.sb_mb = m->m_next; 1083 m->m_nextpkt = m->m_next = NULL; 1084 cm = m; 1085 m = so->so_rcv.sb_mb; 1086 sbsync(&so->so_rcv, nextrecord); 1087 if (controlp) { 1088 if (pr->pr_domain->dom_externalize) { 1089 sb_mtx_unlock(&so->so_rcv); 1090 if (dosolock) 1091 sounlock_shared(so); 1092 error = 1093 (*pr->pr_domain->dom_externalize) 1094 (cm, controllen, flags); 1095 if (dosolock) 1096 solock_shared(so); 1097 sb_mtx_lock(&so->so_rcv); 1098 } 1099 *controlp = cm; 1100 } else { 1101 /* 1102 * Dispose of any SCM_RIGHTS message that went 1103 * through the read path rather than recv. 1104 */ 1105 if (pr->pr_domain->dom_dispose) { 1106 sb_mtx_unlock(&so->so_rcv); 1107 pr->pr_domain->dom_dispose(cm); 1108 sb_mtx_lock(&so->so_rcv); 1109 } 1110 m_free(cm); 1111 } 1112 } 1113 if (m != NULL) 1114 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1115 else 1116 nextrecord = so->so_rcv.sb_mb; 1117 if (controlp && !skip) 1118 controlp = &(*controlp)->m_next; 1119 orig_resid = 0; 1120 } 1121 1122 /* If m is non-NULL, we have some data to read. */ 1123 if (m) { 1124 type = m->m_type; 1125 if (type == MT_OOBDATA) 1126 flags |= MSG_OOB; 1127 if (m->m_flags & M_BCAST) 1128 flags |= MSG_BCAST; 1129 if (m->m_flags & M_MCAST) 1130 flags |= MSG_MCAST; 1131 } 1132 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1133 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1134 1135 moff = 0; 1136 offset = 0; 1137 while (m && uio->uio_resid > 0 && error == 0) { 1138 if (m->m_type == MT_OOBDATA) { 1139 if (type != MT_OOBDATA) 1140 break; 1141 } else if (type == MT_OOBDATA) { 1142 break; 1143 } else if (m->m_type == MT_CONTROL) { 1144 /* 1145 * If there is more than one control message in the 1146 * stream, we do a short read. Next can be received 1147 * or disposed by another system call. 1148 */ 1149 break; 1150 #ifdef DIAGNOSTIC 1151 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { 1152 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 1153 so, so->so_type, m, m->m_type); 1154 #endif 1155 } 1156 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1157 len = uio->uio_resid; 1158 if (so->so_oobmark && len > so->so_oobmark - offset) 1159 len = so->so_oobmark - offset; 1160 if (len > m->m_len - moff) 1161 len = m->m_len - moff; 1162 /* 1163 * If mp is set, just pass back the mbufs. 1164 * Otherwise copy them out via the uio, then free. 1165 * Sockbuf must be consistent here (points to current mbuf, 1166 * it points to next record) when we drop priority; 1167 * we must note any additions to the sockbuf when we 1168 * block interrupts again. 1169 */ 1170 if (mp == NULL && uio_error == 0) { 1171 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1172 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1173 resid = uio->uio_resid; 1174 sb_mtx_unlock(&so->so_rcv); 1175 if (dosolock) 1176 sounlock_shared(so); 1177 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1178 if (dosolock) 1179 solock_shared(so); 1180 sb_mtx_lock(&so->so_rcv); 1181 if (uio_error) 1182 uio->uio_resid = resid - len; 1183 } else 1184 uio->uio_resid -= len; 1185 if (len == m->m_len - moff) { 1186 if (m->m_flags & M_EOR) 1187 flags |= MSG_EOR; 1188 if (flags & MSG_PEEK) { 1189 m = m->m_next; 1190 moff = 0; 1191 orig_resid = 0; 1192 } else { 1193 nextrecord = m->m_nextpkt; 1194 sbfree(so, &so->so_rcv, m); 1195 if (mp) { 1196 *mp = m; 1197 mp = &m->m_next; 1198 so->so_rcv.sb_mb = m = m->m_next; 1199 *mp = NULL; 1200 } else { 1201 so->so_rcv.sb_mb = m_free(m); 1202 m = so->so_rcv.sb_mb; 1203 } 1204 /* 1205 * If m != NULL, we also know that 1206 * so->so_rcv.sb_mb != NULL. 1207 */ 1208 KASSERT(so->so_rcv.sb_mb == m); 1209 if (m) { 1210 m->m_nextpkt = nextrecord; 1211 if (nextrecord == NULL) 1212 so->so_rcv.sb_lastrecord = m; 1213 } else { 1214 so->so_rcv.sb_mb = nextrecord; 1215 SB_EMPTY_FIXUP(&so->so_rcv); 1216 } 1217 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1218 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1219 } 1220 } else { 1221 if (flags & MSG_PEEK) { 1222 moff += len; 1223 orig_resid = 0; 1224 } else { 1225 if (mp) 1226 *mp = m_copym(m, 0, len, M_WAIT); 1227 m->m_data += len; 1228 m->m_len -= len; 1229 so->so_rcv.sb_cc -= len; 1230 so->so_rcv.sb_datacc -= len; 1231 } 1232 } 1233 if (so->so_oobmark) { 1234 if ((flags & MSG_PEEK) == 0) { 1235 so->so_oobmark -= len; 1236 if (so->so_oobmark == 0) { 1237 so->so_rcv.sb_state |= SS_RCVATMARK; 1238 break; 1239 } 1240 } else { 1241 offset += len; 1242 if (offset == so->so_oobmark) 1243 break; 1244 } 1245 } 1246 if (flags & MSG_EOR) 1247 break; 1248 /* 1249 * If the MSG_WAITALL flag is set (for non-atomic socket), 1250 * we must not quit until "uio->uio_resid == 0" or an error 1251 * termination. If a signal/timeout occurs, return 1252 * with a short count but without error. 1253 * Keep sockbuf locked against other readers. 1254 */ 1255 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1256 !sosendallatonce(so) && !nextrecord) { 1257 if (so->so_rcv.sb_state & SS_CANTRCVMORE || 1258 so->so_error) 1259 break; 1260 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1261 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1262 if (sbwait(so, &so->so_rcv)) { 1263 sb_mtx_unlock(&so->so_rcv); 1264 if (dosolock) 1265 sounlock_shared(so); 1266 sbunlock(&so->so_rcv); 1267 return (0); 1268 } 1269 if ((m = so->so_rcv.sb_mb) != NULL) 1270 nextrecord = m->m_nextpkt; 1271 } 1272 } 1273 1274 if (m && pr->pr_flags & PR_ATOMIC) { 1275 flags |= MSG_TRUNC; 1276 if ((flags & MSG_PEEK) == 0) 1277 (void) sbdroprecord(so, &so->so_rcv); 1278 } 1279 if ((flags & MSG_PEEK) == 0) { 1280 if (m == NULL) { 1281 /* 1282 * First part is an inline SB_EMPTY_FIXUP(). Second 1283 * part makes sure sb_lastrecord is up-to-date if 1284 * there is still data in the socket buffer. 1285 */ 1286 so->so_rcv.sb_mb = nextrecord; 1287 if (so->so_rcv.sb_mb == NULL) { 1288 so->so_rcv.sb_mbtail = NULL; 1289 so->so_rcv.sb_lastrecord = NULL; 1290 } else if (nextrecord->m_nextpkt == NULL) 1291 so->so_rcv.sb_lastrecord = nextrecord; 1292 } 1293 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1294 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1295 if (pr->pr_flags & PR_WANTRCVD) { 1296 sb_mtx_unlock(&so->so_rcv); 1297 if (!dosolock) 1298 solock_shared(so); 1299 pru_rcvd(so); 1300 if (!dosolock) 1301 sounlock_shared(so); 1302 sb_mtx_lock(&so->so_rcv); 1303 } 1304 } 1305 if (orig_resid == uio->uio_resid && orig_resid && 1306 (flags & MSG_EOR) == 0 && 1307 (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) { 1308 sb_mtx_unlock(&so->so_rcv); 1309 sbunlock(&so->so_rcv); 1310 goto restart; 1311 } 1312 1313 if (uio_error) 1314 error = uio_error; 1315 1316 if (flagsp) 1317 *flagsp |= flags; 1318 release: 1319 sb_mtx_unlock(&so->so_rcv); 1320 if (dosolock) 1321 sounlock_shared(so); 1322 sbunlock(&so->so_rcv); 1323 return (error); 1324 } 1325 1326 int 1327 soshutdown(struct socket *so, int how) 1328 { 1329 int error = 0; 1330 1331 switch (how) { 1332 case SHUT_RD: 1333 sorflush(so); 1334 break; 1335 case SHUT_RDWR: 1336 sorflush(so); 1337 /* FALLTHROUGH */ 1338 case SHUT_WR: 1339 solock(so); 1340 error = pru_shutdown(so); 1341 sounlock(so); 1342 break; 1343 default: 1344 error = EINVAL; 1345 break; 1346 } 1347 1348 return (error); 1349 } 1350 1351 void 1352 sorflush(struct socket *so) 1353 { 1354 struct sockbuf *sb = &so->so_rcv; 1355 struct mbuf *m; 1356 const struct protosw *pr = so->so_proto; 1357 int error; 1358 1359 error = sblock(sb, SBL_WAIT | SBL_NOINTR); 1360 /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ 1361 KASSERT(error == 0); 1362 1363 solock_shared(so); 1364 socantrcvmore(so); 1365 mtx_enter(&sb->sb_mtx); 1366 m = sb->sb_mb; 1367 memset(&sb->sb_startzero, 0, 1368 (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); 1369 sb->sb_timeo_nsecs = INFSLP; 1370 mtx_leave(&sb->sb_mtx); 1371 sounlock_shared(so); 1372 sbunlock(sb); 1373 1374 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1375 (*pr->pr_domain->dom_dispose)(m); 1376 m_purge(m); 1377 } 1378 1379 #ifdef SOCKET_SPLICE 1380 1381 #define so_splicelen so_sp->ssp_len 1382 #define so_splicemax so_sp->ssp_max 1383 #define so_idletv so_sp->ssp_idletv 1384 #define so_idleto so_sp->ssp_idleto 1385 #define so_splicetask so_sp->ssp_task 1386 1387 int 1388 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1389 { 1390 struct file *fp; 1391 struct socket *sosp; 1392 struct taskq *tq; 1393 int error = 0; 1394 1395 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1396 return (EPROTONOSUPPORT); 1397 if (max && max < 0) 1398 return (EINVAL); 1399 if (tv && (tv->tv_sec < 0 || !timerisvalid(tv))) 1400 return (EINVAL); 1401 1402 /* If no fd is given, unsplice by removing existing link. */ 1403 if (fd < 0) { 1404 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1405 return (error); 1406 solock(so); 1407 if (so->so_options & SO_ACCEPTCONN) { 1408 error = EOPNOTSUPP; 1409 goto out; 1410 } 1411 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1412 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1413 error = ENOTCONN; 1414 goto out; 1415 } 1416 1417 if (so->so_sp && so->so_sp->ssp_socket) 1418 sounsplice(so, so->so_sp->ssp_socket, 0); 1419 out: 1420 sounlock(so); 1421 sbunlock(&so->so_rcv); 1422 return (error); 1423 } 1424 1425 if (sosplice_taskq == NULL) { 1426 rw_enter_write(&sosplice_lock); 1427 if (sosplice_taskq == NULL) { 1428 tq = taskq_create("sosplice", 1, IPL_SOFTNET, 1429 TASKQ_MPSAFE); 1430 if (tq == NULL) { 1431 rw_exit_write(&sosplice_lock); 1432 return (ENOMEM); 1433 } 1434 /* Ensure the taskq is fully visible to other CPUs. */ 1435 membar_producer(); 1436 sosplice_taskq = tq; 1437 } 1438 rw_exit_write(&sosplice_lock); 1439 } else { 1440 /* Ensure the taskq is fully visible on this CPU. */ 1441 membar_consumer(); 1442 } 1443 1444 /* Find sosp, the drain socket where data will be spliced into. */ 1445 if ((error = getsock(curproc, fd, &fp)) != 0) 1446 return (error); 1447 sosp = fp->f_data; 1448 1449 if (sosp->so_proto->pr_usrreqs->pru_send != 1450 so->so_proto->pr_usrreqs->pru_send) { 1451 error = EPROTONOSUPPORT; 1452 goto frele; 1453 } 1454 1455 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1456 goto frele; 1457 if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) { 1458 sbunlock(&so->so_rcv); 1459 goto frele; 1460 } 1461 solock(so); 1462 1463 if ((so->so_options & SO_ACCEPTCONN) || 1464 (sosp->so_options & SO_ACCEPTCONN)) { 1465 error = EOPNOTSUPP; 1466 goto release; 1467 } 1468 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1469 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1470 error = ENOTCONN; 1471 goto release; 1472 } 1473 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1474 error = ENOTCONN; 1475 goto release; 1476 } 1477 if (so->so_sp == NULL) 1478 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1479 if (sosp->so_sp == NULL) 1480 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1481 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1482 error = EBUSY; 1483 goto release; 1484 } 1485 1486 so->so_splicelen = 0; 1487 so->so_splicemax = max; 1488 if (tv) 1489 so->so_idletv = *tv; 1490 else 1491 timerclear(&so->so_idletv); 1492 timeout_set_flags(&so->so_idleto, soidle, so, 1493 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 1494 task_set(&so->so_splicetask, sotask, so); 1495 1496 /* 1497 * To prevent sorwakeup() calling somove() before this somove() 1498 * has finished, the socket buffers are not marked as spliced yet. 1499 */ 1500 1501 /* Splice so and sosp together. */ 1502 mtx_enter(&so->so_rcv.sb_mtx); 1503 mtx_enter(&sosp->so_snd.sb_mtx); 1504 so->so_sp->ssp_socket = sosp; 1505 sosp->so_sp->ssp_soback = so; 1506 mtx_leave(&sosp->so_snd.sb_mtx); 1507 mtx_leave(&so->so_rcv.sb_mtx); 1508 1509 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1510 sounlock(so); 1511 if (somove(so, M_WAIT)) { 1512 mtx_enter(&so->so_rcv.sb_mtx); 1513 mtx_enter(&sosp->so_snd.sb_mtx); 1514 so->so_rcv.sb_flags |= SB_SPLICE; 1515 sosp->so_snd.sb_flags |= SB_SPLICE; 1516 mtx_leave(&sosp->so_snd.sb_mtx); 1517 mtx_leave(&so->so_rcv.sb_mtx); 1518 } 1519 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1520 solock(so); 1521 1522 release: 1523 sounlock(so); 1524 sbunlock(&sosp->so_snd); 1525 sbunlock(&so->so_rcv); 1526 frele: 1527 FRELE(fp, curproc); 1528 1529 return (error); 1530 } 1531 1532 void 1533 sounsplice(struct socket *so, struct socket *sosp, int freeing) 1534 { 1535 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1536 sbassertlocked(&so->so_rcv); 1537 soassertlocked(so); 1538 1539 task_del(sosplice_taskq, &so->so_splicetask); 1540 timeout_del(&so->so_idleto); 1541 1542 mtx_enter(&so->so_rcv.sb_mtx); 1543 mtx_enter(&sosp->so_snd.sb_mtx); 1544 so->so_rcv.sb_flags &= ~SB_SPLICE; 1545 sosp->so_snd.sb_flags &= ~SB_SPLICE; 1546 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1547 mtx_leave(&sosp->so_snd.sb_mtx); 1548 mtx_leave(&so->so_rcv.sb_mtx); 1549 1550 /* Do not wakeup a socket that is about to be freed. */ 1551 if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) 1552 sorwakeup(so); 1553 if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp)) 1554 sowwakeup(sosp); 1555 } 1556 1557 void 1558 soidle(void *arg) 1559 { 1560 struct socket *so = arg; 1561 1562 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1563 solock(so); 1564 /* 1565 * Depending on socket type, sblock(&so->so_rcv) or solock() 1566 * is always held while modifying SB_SPLICE and 1567 * so->so_sp->ssp_socket. 1568 */ 1569 if (so->so_rcv.sb_flags & SB_SPLICE) { 1570 so->so_error = ETIMEDOUT; 1571 sounsplice(so, so->so_sp->ssp_socket, 0); 1572 } 1573 sounlock(so); 1574 sbunlock(&so->so_rcv); 1575 } 1576 1577 void 1578 sotask(void *arg) 1579 { 1580 struct socket *so = arg; 1581 int doyield = 0; 1582 int sockstream = (so->so_proto->pr_flags & PR_WANTRCVD); 1583 1584 /* 1585 * sblock() on `so_rcv' protects sockets from being unspliced 1586 * for UDP case. TCP sockets still rely on solock(). 1587 */ 1588 1589 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1590 if (sockstream) 1591 solock(so); 1592 1593 if (so->so_rcv.sb_flags & SB_SPLICE) { 1594 if (sockstream) 1595 doyield = 1; 1596 somove(so, M_DONTWAIT); 1597 } 1598 1599 if (sockstream) 1600 sounlock(so); 1601 sbunlock(&so->so_rcv); 1602 1603 if (doyield) { 1604 /* Avoid user land starvation. */ 1605 yield(); 1606 } 1607 } 1608 1609 /* 1610 * The socket splicing task or idle timeout may sleep while grabbing the net 1611 * lock. As sofree() can be called anytime, sotask() or soidle() could access 1612 * the socket memory of a freed socket after wakeup. So delay the pool_put() 1613 * after all pending socket splicing tasks or timeouts have finished. Do this 1614 * by scheduling it on the same threads. 1615 */ 1616 void 1617 soreaper(void *arg) 1618 { 1619 struct socket *so = arg; 1620 1621 /* Reuse splice task, sounsplice() has been called before. */ 1622 task_set(&so->so_sp->ssp_task, soput, so); 1623 task_add(sosplice_taskq, &so->so_sp->ssp_task); 1624 } 1625 1626 void 1627 soput(void *arg) 1628 { 1629 struct socket *so = arg; 1630 1631 pool_put(&sosplice_pool, so->so_sp); 1632 pool_put(&socket_pool, so); 1633 } 1634 1635 /* 1636 * Move data from receive buffer of spliced source socket to send 1637 * buffer of drain socket. Try to move as much as possible in one 1638 * big chunk. It is a TCP only implementation. 1639 * Return value 0 means splicing has been finished, 1 continue. 1640 */ 1641 int 1642 somove(struct socket *so, int wait) 1643 { 1644 struct socket *sosp = so->so_sp->ssp_socket; 1645 struct mbuf *m, **mp, *nextrecord; 1646 u_long len, off, oobmark; 1647 long space; 1648 int error = 0, maxreached = 0, unsplice = 0; 1649 unsigned int rcvstate; 1650 int sockdgram = ((so->so_proto->pr_flags & 1651 PR_WANTRCVD) == 0); 1652 1653 if (sockdgram) 1654 sbassertlocked(&so->so_rcv); 1655 else 1656 soassertlocked(so); 1657 1658 mtx_enter(&so->so_rcv.sb_mtx); 1659 mtx_enter(&sosp->so_snd.sb_mtx); 1660 1661 nextpkt: 1662 if ((error = READ_ONCE(so->so_error))) 1663 goto release; 1664 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) { 1665 error = EPIPE; 1666 goto release; 1667 } 1668 1669 error = READ_ONCE(sosp->so_error); 1670 if (error) { 1671 if (error != ETIMEDOUT && error != EFBIG && error != ELOOP) 1672 goto release; 1673 error = 0; 1674 } 1675 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1676 goto release; 1677 1678 /* Calculate how many bytes can be copied now. */ 1679 len = so->so_rcv.sb_datacc; 1680 if (so->so_splicemax) { 1681 KASSERT(so->so_splicelen < so->so_splicemax); 1682 if (so->so_splicemax <= so->so_splicelen + len) { 1683 len = so->so_splicemax - so->so_splicelen; 1684 maxreached = 1; 1685 } 1686 } 1687 space = sbspace_locked(sosp, &sosp->so_snd); 1688 if (so->so_oobmark && so->so_oobmark < len && 1689 so->so_oobmark < space + 1024) 1690 space += 1024; 1691 if (space <= 0) { 1692 maxreached = 0; 1693 goto release; 1694 } 1695 if (space < len) { 1696 maxreached = 0; 1697 if (space < sosp->so_snd.sb_lowat) 1698 goto release; 1699 len = space; 1700 } 1701 sosp->so_snd.sb_state |= SS_ISSENDING; 1702 1703 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1704 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1705 m = so->so_rcv.sb_mb; 1706 if (m == NULL) 1707 goto release; 1708 nextrecord = m->m_nextpkt; 1709 1710 /* Drop address and control information not used with splicing. */ 1711 if (so->so_proto->pr_flags & PR_ADDR) { 1712 #ifdef DIAGNOSTIC 1713 if (m->m_type != MT_SONAME) 1714 panic("somove soname: so %p, so_type %d, m %p, " 1715 "m_type %d", so, so->so_type, m, m->m_type); 1716 #endif 1717 m = m->m_next; 1718 } 1719 while (m && m->m_type == MT_CONTROL) 1720 m = m->m_next; 1721 if (m == NULL) { 1722 sbdroprecord(so, &so->so_rcv); 1723 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1724 mtx_leave(&sosp->so_snd.sb_mtx); 1725 mtx_leave(&so->so_rcv.sb_mtx); 1726 pru_rcvd(so); 1727 mtx_enter(&so->so_rcv.sb_mtx); 1728 mtx_enter(&sosp->so_snd.sb_mtx); 1729 } 1730 goto nextpkt; 1731 } 1732 1733 /* 1734 * By splicing sockets connected to localhost, userland might create a 1735 * loop. Dissolve splicing with error if loop is detected by counter. 1736 * 1737 * If we deal with looped broadcast/multicast packet we bail out with 1738 * no error to suppress splice termination. 1739 */ 1740 if ((m->m_flags & M_PKTHDR) && 1741 ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) || 1742 ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) { 1743 error = ELOOP; 1744 goto release; 1745 } 1746 1747 if (so->so_proto->pr_flags & PR_ATOMIC) { 1748 if ((m->m_flags & M_PKTHDR) == 0) 1749 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1750 "m_type %d", so, so->so_type, m, m->m_type); 1751 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1752 error = EMSGSIZE; 1753 goto release; 1754 } 1755 if (len < m->m_pkthdr.len) 1756 goto release; 1757 if (m->m_pkthdr.len < len) { 1758 maxreached = 0; 1759 len = m->m_pkthdr.len; 1760 } 1761 /* 1762 * Throw away the name mbuf after it has been assured 1763 * that the whole first record can be processed. 1764 */ 1765 m = so->so_rcv.sb_mb; 1766 sbfree(so, &so->so_rcv, m); 1767 so->so_rcv.sb_mb = m_free(m); 1768 sbsync(&so->so_rcv, nextrecord); 1769 } 1770 /* 1771 * Throw away the control mbufs after it has been assured 1772 * that the whole first record can be processed. 1773 */ 1774 m = so->so_rcv.sb_mb; 1775 while (m && m->m_type == MT_CONTROL) { 1776 sbfree(so, &so->so_rcv, m); 1777 so->so_rcv.sb_mb = m_free(m); 1778 m = so->so_rcv.sb_mb; 1779 sbsync(&so->so_rcv, nextrecord); 1780 } 1781 1782 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1783 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1784 1785 /* Take at most len mbufs out of receive buffer. */ 1786 for (off = 0, mp = &m; off <= len && *mp; 1787 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1788 u_long size = len - off; 1789 1790 #ifdef DIAGNOSTIC 1791 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1792 panic("somove type: so %p, so_type %d, m %p, " 1793 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1794 #endif 1795 if ((*mp)->m_len > size) { 1796 /* 1797 * Move only a partial mbuf at maximum splice length or 1798 * if the drain buffer is too small for this large mbuf. 1799 */ 1800 if (!maxreached && sosp->so_snd.sb_datacc > 0) { 1801 len -= size; 1802 break; 1803 } 1804 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1805 if (*mp == NULL) { 1806 len -= size; 1807 break; 1808 } 1809 so->so_rcv.sb_mb->m_data += size; 1810 so->so_rcv.sb_mb->m_len -= size; 1811 so->so_rcv.sb_cc -= size; 1812 so->so_rcv.sb_datacc -= size; 1813 } else { 1814 *mp = so->so_rcv.sb_mb; 1815 sbfree(so, &so->so_rcv, *mp); 1816 so->so_rcv.sb_mb = (*mp)->m_next; 1817 sbsync(&so->so_rcv, nextrecord); 1818 } 1819 } 1820 *mp = NULL; 1821 1822 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1823 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1824 SBCHECK(so, &so->so_rcv); 1825 if (m == NULL) 1826 goto release; 1827 m->m_nextpkt = NULL; 1828 if (m->m_flags & M_PKTHDR) { 1829 m_resethdr(m); 1830 m->m_pkthdr.len = len; 1831 } 1832 1833 /* Send window update to source peer as receive buffer has changed. */ 1834 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1835 mtx_leave(&sosp->so_snd.sb_mtx); 1836 mtx_leave(&so->so_rcv.sb_mtx); 1837 pru_rcvd(so); 1838 mtx_enter(&so->so_rcv.sb_mtx); 1839 mtx_enter(&sosp->so_snd.sb_mtx); 1840 } 1841 1842 /* Receive buffer did shrink by len bytes, adjust oob. */ 1843 rcvstate = so->so_rcv.sb_state; 1844 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1845 oobmark = so->so_oobmark; 1846 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1847 if (oobmark) { 1848 if (oobmark == len) 1849 so->so_rcv.sb_state |= SS_RCVATMARK; 1850 if (oobmark >= len) 1851 oobmark = 0; 1852 } 1853 1854 /* 1855 * Handle oob data. If any malloc fails, ignore error. 1856 * TCP urgent data is not very reliable anyway. 1857 */ 1858 while (((rcvstate & SS_RCVATMARK) || oobmark) && 1859 (so->so_options & SO_OOBINLINE)) { 1860 struct mbuf *o = NULL; 1861 1862 if (rcvstate & SS_RCVATMARK) { 1863 o = m_get(wait, MT_DATA); 1864 rcvstate &= ~SS_RCVATMARK; 1865 } else if (oobmark) { 1866 o = m_split(m, oobmark, wait); 1867 if (o) { 1868 mtx_leave(&sosp->so_snd.sb_mtx); 1869 mtx_leave(&so->so_rcv.sb_mtx); 1870 error = pru_send(sosp, m, NULL, NULL); 1871 mtx_enter(&so->so_rcv.sb_mtx); 1872 mtx_enter(&sosp->so_snd.sb_mtx); 1873 1874 if (error) { 1875 if (sosp->so_snd.sb_state & 1876 SS_CANTSENDMORE) 1877 error = EPIPE; 1878 m_freem(o); 1879 goto release; 1880 } 1881 len -= oobmark; 1882 so->so_splicelen += oobmark; 1883 m = o; 1884 o = m_get(wait, MT_DATA); 1885 } 1886 oobmark = 0; 1887 } 1888 if (o) { 1889 o->m_len = 1; 1890 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1891 1892 mtx_leave(&sosp->so_snd.sb_mtx); 1893 mtx_leave(&so->so_rcv.sb_mtx); 1894 error = pru_sendoob(sosp, o, NULL, NULL); 1895 mtx_enter(&so->so_rcv.sb_mtx); 1896 mtx_enter(&sosp->so_snd.sb_mtx); 1897 1898 if (error) { 1899 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) 1900 error = EPIPE; 1901 m_freem(m); 1902 goto release; 1903 } 1904 len -= 1; 1905 so->so_splicelen += 1; 1906 if (oobmark) { 1907 oobmark -= 1; 1908 if (oobmark == 0) 1909 rcvstate |= SS_RCVATMARK; 1910 } 1911 m_adj(m, 1); 1912 } 1913 } 1914 1915 /* Append all remaining data to drain socket. */ 1916 if (so->so_rcv.sb_cc == 0 || maxreached) 1917 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1918 1919 mtx_leave(&sosp->so_snd.sb_mtx); 1920 mtx_leave(&so->so_rcv.sb_mtx); 1921 1922 if (sockdgram) 1923 solock_shared(sosp); 1924 error = pru_send(sosp, m, NULL, NULL); 1925 if (sockdgram) 1926 sounlock_shared(sosp); 1927 1928 mtx_enter(&so->so_rcv.sb_mtx); 1929 mtx_enter(&sosp->so_snd.sb_mtx); 1930 1931 if (error) { 1932 if (sosp->so_snd.sb_state & SS_CANTSENDMORE || 1933 sosp->so_pcb == NULL) 1934 error = EPIPE; 1935 goto release; 1936 } 1937 so->so_splicelen += len; 1938 1939 /* Move several packets if possible. */ 1940 if (!maxreached && nextrecord) 1941 goto nextpkt; 1942 1943 release: 1944 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1945 1946 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1947 error = EFBIG; 1948 if (error) 1949 WRITE_ONCE(so->so_error, error); 1950 1951 if (((so->so_rcv.sb_state & SS_CANTRCVMORE) && 1952 so->so_rcv.sb_cc == 0) || 1953 (sosp->so_snd.sb_state & SS_CANTSENDMORE) || 1954 maxreached || error) 1955 unsplice = 1; 1956 1957 mtx_leave(&sosp->so_snd.sb_mtx); 1958 mtx_leave(&so->so_rcv.sb_mtx); 1959 1960 if (unsplice) { 1961 if (sockdgram) 1962 solock(so); 1963 sounsplice(so, sosp, 0); 1964 if (sockdgram) 1965 sounlock(so); 1966 1967 return (0); 1968 } 1969 if (timerisset(&so->so_idletv)) 1970 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1971 return (1); 1972 } 1973 #endif /* SOCKET_SPLICE */ 1974 1975 void 1976 sorwakeup(struct socket *so) 1977 { 1978 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 1979 soassertlocked_readonly(so); 1980 1981 #ifdef SOCKET_SPLICE 1982 if (so->so_proto->pr_flags & PR_SPLICE) { 1983 sb_mtx_lock(&so->so_rcv); 1984 if (so->so_rcv.sb_flags & SB_SPLICE) 1985 task_add(sosplice_taskq, &so->so_splicetask); 1986 if (isspliced(so)) { 1987 sb_mtx_unlock(&so->so_rcv); 1988 return; 1989 } 1990 sb_mtx_unlock(&so->so_rcv); 1991 } 1992 #endif 1993 sowakeup(so, &so->so_rcv); 1994 if (so->so_upcall) 1995 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1996 } 1997 1998 void 1999 sowwakeup(struct socket *so) 2000 { 2001 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2002 soassertlocked_readonly(so); 2003 2004 #ifdef SOCKET_SPLICE 2005 if (so->so_proto->pr_flags & PR_SPLICE) { 2006 sb_mtx_lock(&so->so_snd); 2007 if (so->so_snd.sb_flags & SB_SPLICE) 2008 task_add(sosplice_taskq, 2009 &so->so_sp->ssp_soback->so_splicetask); 2010 if (issplicedback(so)) { 2011 sb_mtx_unlock(&so->so_snd); 2012 return; 2013 } 2014 sb_mtx_unlock(&so->so_snd); 2015 } 2016 #endif 2017 sowakeup(so, &so->so_snd); 2018 } 2019 2020 int 2021 sosetopt(struct socket *so, int level, int optname, struct mbuf *m) 2022 { 2023 int error = 0; 2024 2025 if (level != SOL_SOCKET) { 2026 if (so->so_proto->pr_ctloutput) { 2027 solock(so); 2028 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 2029 level, optname, m); 2030 sounlock(so); 2031 return (error); 2032 } 2033 error = ENOPROTOOPT; 2034 } else { 2035 switch (optname) { 2036 2037 case SO_LINGER: 2038 if (m == NULL || m->m_len != sizeof (struct linger) || 2039 mtod(m, struct linger *)->l_linger < 0 || 2040 mtod(m, struct linger *)->l_linger > SHRT_MAX) 2041 return (EINVAL); 2042 2043 solock(so); 2044 so->so_linger = mtod(m, struct linger *)->l_linger; 2045 if (*mtod(m, int *)) 2046 so->so_options |= optname; 2047 else 2048 so->so_options &= ~optname; 2049 sounlock(so); 2050 2051 break; 2052 case SO_BINDANY: 2053 if ((error = suser(curproc)) != 0) /* XXX */ 2054 return (error); 2055 /* FALLTHROUGH */ 2056 2057 case SO_DEBUG: 2058 case SO_KEEPALIVE: 2059 case SO_USELOOPBACK: 2060 case SO_BROADCAST: 2061 case SO_REUSEADDR: 2062 case SO_REUSEPORT: 2063 case SO_OOBINLINE: 2064 case SO_TIMESTAMP: 2065 case SO_ZEROIZE: 2066 if (m == NULL || m->m_len < sizeof (int)) 2067 return (EINVAL); 2068 2069 solock(so); 2070 if (*mtod(m, int *)) 2071 so->so_options |= optname; 2072 else 2073 so->so_options &= ~optname; 2074 sounlock(so); 2075 2076 break; 2077 case SO_DONTROUTE: 2078 if (m == NULL || m->m_len < sizeof (int)) 2079 return (EINVAL); 2080 if (*mtod(m, int *)) 2081 error = EOPNOTSUPP; 2082 break; 2083 2084 case SO_SNDBUF: 2085 case SO_RCVBUF: 2086 case SO_SNDLOWAT: 2087 case SO_RCVLOWAT: 2088 { 2089 struct sockbuf *sb = (optname == SO_SNDBUF || 2090 optname == SO_SNDLOWAT ? 2091 &so->so_snd : &so->so_rcv); 2092 u_long cnt; 2093 2094 if (m == NULL || m->m_len < sizeof (int)) 2095 return (EINVAL); 2096 cnt = *mtod(m, int *); 2097 if ((long)cnt <= 0) 2098 cnt = 1; 2099 2100 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2101 solock(so); 2102 mtx_enter(&sb->sb_mtx); 2103 2104 switch (optname) { 2105 case SO_SNDBUF: 2106 case SO_RCVBUF: 2107 if (sb->sb_state & 2108 (SS_CANTSENDMORE | SS_CANTRCVMORE)) { 2109 error = EINVAL; 2110 break; 2111 } 2112 if (sbcheckreserve(cnt, sb->sb_wat) || 2113 sbreserve(so, sb, cnt)) { 2114 error = ENOBUFS; 2115 break; 2116 } 2117 sb->sb_wat = cnt; 2118 break; 2119 case SO_SNDLOWAT: 2120 case SO_RCVLOWAT: 2121 sb->sb_lowat = (cnt > sb->sb_hiwat) ? 2122 sb->sb_hiwat : cnt; 2123 break; 2124 } 2125 2126 mtx_leave(&sb->sb_mtx); 2127 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2128 sounlock(so); 2129 2130 break; 2131 } 2132 2133 case SO_SNDTIMEO: 2134 case SO_RCVTIMEO: 2135 { 2136 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2137 &so->so_snd : &so->so_rcv); 2138 struct timeval tv; 2139 uint64_t nsecs; 2140 2141 if (m == NULL || m->m_len < sizeof (tv)) 2142 return (EINVAL); 2143 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 2144 if (!timerisvalid(&tv)) 2145 return (EINVAL); 2146 nsecs = TIMEVAL_TO_NSEC(&tv); 2147 if (nsecs == UINT64_MAX) 2148 return (EDOM); 2149 if (nsecs == 0) 2150 nsecs = INFSLP; 2151 2152 mtx_enter(&sb->sb_mtx); 2153 sb->sb_timeo_nsecs = nsecs; 2154 mtx_leave(&sb->sb_mtx); 2155 break; 2156 } 2157 2158 case SO_RTABLE: 2159 if (so->so_proto->pr_domain && 2160 so->so_proto->pr_domain->dom_protosw && 2161 so->so_proto->pr_ctloutput) { 2162 const struct domain *dom = 2163 so->so_proto->pr_domain; 2164 2165 level = dom->dom_protosw->pr_protocol; 2166 solock(so); 2167 error = (*so->so_proto->pr_ctloutput) 2168 (PRCO_SETOPT, so, level, optname, m); 2169 sounlock(so); 2170 } else 2171 error = ENOPROTOOPT; 2172 break; 2173 #ifdef SOCKET_SPLICE 2174 case SO_SPLICE: 2175 if (m == NULL) { 2176 error = sosplice(so, -1, 0, NULL); 2177 } else if (m->m_len < sizeof(int)) { 2178 error = EINVAL; 2179 } else if (m->m_len < sizeof(struct splice)) { 2180 error = sosplice(so, *mtod(m, int *), 0, NULL); 2181 } else { 2182 error = sosplice(so, 2183 mtod(m, struct splice *)->sp_fd, 2184 mtod(m, struct splice *)->sp_max, 2185 &mtod(m, struct splice *)->sp_idle); 2186 } 2187 break; 2188 #endif /* SOCKET_SPLICE */ 2189 2190 default: 2191 error = ENOPROTOOPT; 2192 break; 2193 } 2194 } 2195 2196 return (error); 2197 } 2198 2199 int 2200 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 2201 { 2202 int error = 0; 2203 2204 if (level != SOL_SOCKET) { 2205 if (so->so_proto->pr_ctloutput) { 2206 m->m_len = 0; 2207 2208 solock(so); 2209 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 2210 level, optname, m); 2211 sounlock(so); 2212 return (error); 2213 } else 2214 return (ENOPROTOOPT); 2215 } else { 2216 m->m_len = sizeof (int); 2217 2218 switch (optname) { 2219 2220 case SO_LINGER: 2221 m->m_len = sizeof (struct linger); 2222 solock_shared(so); 2223 mtod(m, struct linger *)->l_onoff = 2224 so->so_options & SO_LINGER; 2225 mtod(m, struct linger *)->l_linger = so->so_linger; 2226 sounlock_shared(so); 2227 break; 2228 2229 case SO_BINDANY: 2230 case SO_USELOOPBACK: 2231 case SO_DEBUG: 2232 case SO_KEEPALIVE: 2233 case SO_REUSEADDR: 2234 case SO_REUSEPORT: 2235 case SO_BROADCAST: 2236 case SO_OOBINLINE: 2237 case SO_ACCEPTCONN: 2238 case SO_TIMESTAMP: 2239 case SO_ZEROIZE: 2240 *mtod(m, int *) = so->so_options & optname; 2241 break; 2242 2243 case SO_DONTROUTE: 2244 *mtod(m, int *) = 0; 2245 break; 2246 2247 case SO_TYPE: 2248 *mtod(m, int *) = so->so_type; 2249 break; 2250 2251 case SO_ERROR: 2252 solock(so); 2253 *mtod(m, int *) = so->so_error; 2254 so->so_error = 0; 2255 sounlock(so); 2256 2257 break; 2258 2259 case SO_DOMAIN: 2260 *mtod(m, int *) = so->so_proto->pr_domain->dom_family; 2261 break; 2262 2263 case SO_PROTOCOL: 2264 *mtod(m, int *) = so->so_proto->pr_protocol; 2265 break; 2266 2267 case SO_SNDBUF: 2268 *mtod(m, int *) = so->so_snd.sb_hiwat; 2269 break; 2270 2271 case SO_RCVBUF: 2272 *mtod(m, int *) = so->so_rcv.sb_hiwat; 2273 break; 2274 2275 case SO_SNDLOWAT: 2276 *mtod(m, int *) = so->so_snd.sb_lowat; 2277 break; 2278 2279 case SO_RCVLOWAT: 2280 *mtod(m, int *) = so->so_rcv.sb_lowat; 2281 break; 2282 2283 case SO_SNDTIMEO: 2284 case SO_RCVTIMEO: 2285 { 2286 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2287 &so->so_snd : &so->so_rcv); 2288 struct timeval tv; 2289 uint64_t nsecs; 2290 2291 mtx_enter(&sb->sb_mtx); 2292 nsecs = sb->sb_timeo_nsecs; 2293 mtx_leave(&sb->sb_mtx); 2294 2295 m->m_len = sizeof(struct timeval); 2296 memset(&tv, 0, sizeof(tv)); 2297 if (nsecs != INFSLP) 2298 NSEC_TO_TIMEVAL(nsecs, &tv); 2299 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 2300 break; 2301 } 2302 2303 case SO_RTABLE: 2304 if (so->so_proto->pr_domain && 2305 so->so_proto->pr_domain->dom_protosw && 2306 so->so_proto->pr_ctloutput) { 2307 const struct domain *dom = 2308 so->so_proto->pr_domain; 2309 2310 level = dom->dom_protosw->pr_protocol; 2311 solock(so); 2312 error = (*so->so_proto->pr_ctloutput) 2313 (PRCO_GETOPT, so, level, optname, m); 2314 sounlock(so); 2315 if (error) 2316 return (error); 2317 break; 2318 } 2319 return (ENOPROTOOPT); 2320 2321 #ifdef SOCKET_SPLICE 2322 case SO_SPLICE: 2323 { 2324 off_t len; 2325 2326 m->m_len = sizeof(off_t); 2327 solock_shared(so); 2328 len = so->so_sp ? so->so_sp->ssp_len : 0; 2329 sounlock_shared(so); 2330 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 2331 break; 2332 } 2333 #endif /* SOCKET_SPLICE */ 2334 2335 case SO_PEERCRED: 2336 if (so->so_proto->pr_protocol == AF_UNIX) { 2337 struct unpcb *unp = sotounpcb(so); 2338 2339 solock(so); 2340 if (unp->unp_flags & UNP_FEIDS) { 2341 m->m_len = sizeof(unp->unp_connid); 2342 memcpy(mtod(m, caddr_t), 2343 &(unp->unp_connid), m->m_len); 2344 sounlock(so); 2345 break; 2346 } 2347 sounlock(so); 2348 2349 return (ENOTCONN); 2350 } 2351 return (EOPNOTSUPP); 2352 2353 default: 2354 return (ENOPROTOOPT); 2355 } 2356 return (0); 2357 } 2358 } 2359 2360 void 2361 sohasoutofband(struct socket *so) 2362 { 2363 pgsigio(&so->so_sigio, SIGURG, 0); 2364 knote(&so->so_rcv.sb_klist, 0); 2365 } 2366 2367 void 2368 sofilt_lock(struct socket *so, struct sockbuf *sb) 2369 { 2370 switch (so->so_proto->pr_domain->dom_family) { 2371 case PF_INET: 2372 case PF_INET6: 2373 NET_LOCK_SHARED(); 2374 break; 2375 default: 2376 rw_enter_write(&so->so_lock); 2377 break; 2378 } 2379 2380 mtx_enter(&sb->sb_mtx); 2381 } 2382 2383 void 2384 sofilt_unlock(struct socket *so, struct sockbuf *sb) 2385 { 2386 mtx_leave(&sb->sb_mtx); 2387 2388 switch (so->so_proto->pr_domain->dom_family) { 2389 case PF_INET: 2390 case PF_INET6: 2391 NET_UNLOCK_SHARED(); 2392 break; 2393 default: 2394 rw_exit_write(&so->so_lock); 2395 break; 2396 } 2397 } 2398 2399 int 2400 soo_kqfilter(struct file *fp, struct knote *kn) 2401 { 2402 struct socket *so = kn->kn_fp->f_data; 2403 struct sockbuf *sb; 2404 2405 switch (kn->kn_filter) { 2406 case EVFILT_READ: 2407 kn->kn_fop = &soread_filtops; 2408 sb = &so->so_rcv; 2409 break; 2410 case EVFILT_WRITE: 2411 kn->kn_fop = &sowrite_filtops; 2412 sb = &so->so_snd; 2413 break; 2414 case EVFILT_EXCEPT: 2415 kn->kn_fop = &soexcept_filtops; 2416 sb = &so->so_rcv; 2417 break; 2418 default: 2419 return (EINVAL); 2420 } 2421 2422 klist_insert(&sb->sb_klist, kn); 2423 2424 return (0); 2425 } 2426 2427 void 2428 filt_sordetach(struct knote *kn) 2429 { 2430 struct socket *so = kn->kn_fp->f_data; 2431 2432 klist_remove(&so->so_rcv.sb_klist, kn); 2433 } 2434 2435 int 2436 filt_soread(struct knote *kn, long hint) 2437 { 2438 struct socket *so = kn->kn_fp->f_data; 2439 u_int state = READ_ONCE(so->so_state); 2440 u_int error = READ_ONCE(so->so_error); 2441 int rv = 0; 2442 2443 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2444 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2445 soassertlocked_readonly(so); 2446 2447 if (so->so_options & SO_ACCEPTCONN) { 2448 short qlen = READ_ONCE(so->so_qlen); 2449 2450 if (so->so_rcv.sb_flags & SB_MTXLOCK) 2451 soassertlocked_readonly(so); 2452 2453 kn->kn_data = qlen; 2454 rv = (kn->kn_data != 0); 2455 2456 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { 2457 if (state & SS_ISDISCONNECTED) { 2458 kn->kn_flags |= __EV_HUP; 2459 rv = 1; 2460 } else { 2461 rv = qlen || soreadable(so); 2462 } 2463 } 2464 2465 return rv; 2466 } 2467 2468 kn->kn_data = so->so_rcv.sb_cc; 2469 #ifdef SOCKET_SPLICE 2470 if (isspliced(so)) { 2471 rv = 0; 2472 } else 2473 #endif /* SOCKET_SPLICE */ 2474 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 2475 kn->kn_flags |= EV_EOF; 2476 if (kn->kn_flags & __EV_POLL) { 2477 if (state & SS_ISDISCONNECTED) 2478 kn->kn_flags |= __EV_HUP; 2479 } 2480 kn->kn_fflags = error; 2481 rv = 1; 2482 } else if (error) { 2483 rv = 1; 2484 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2485 rv = (kn->kn_data >= kn->kn_sdata); 2486 } else { 2487 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 2488 } 2489 2490 return rv; 2491 } 2492 2493 void 2494 filt_sowdetach(struct knote *kn) 2495 { 2496 struct socket *so = kn->kn_fp->f_data; 2497 2498 klist_remove(&so->so_snd.sb_klist, kn); 2499 } 2500 2501 int 2502 filt_sowrite(struct knote *kn, long hint) 2503 { 2504 struct socket *so = kn->kn_fp->f_data; 2505 u_int state = READ_ONCE(so->so_state); 2506 u_int error = READ_ONCE(so->so_error); 2507 int rv; 2508 2509 MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); 2510 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2511 soassertlocked_readonly(so); 2512 2513 kn->kn_data = sbspace_locked(so, &so->so_snd); 2514 if (so->so_snd.sb_state & SS_CANTSENDMORE) { 2515 kn->kn_flags |= EV_EOF; 2516 if (kn->kn_flags & __EV_POLL) { 2517 if (state & SS_ISDISCONNECTED) 2518 kn->kn_flags |= __EV_HUP; 2519 } 2520 kn->kn_fflags = error; 2521 rv = 1; 2522 } else if (error) { 2523 rv = 1; 2524 } else if (((state & SS_ISCONNECTED) == 0) && 2525 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 2526 rv = 0; 2527 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2528 rv = (kn->kn_data >= kn->kn_sdata); 2529 } else { 2530 rv = (kn->kn_data >= so->so_snd.sb_lowat); 2531 } 2532 2533 return (rv); 2534 } 2535 2536 int 2537 filt_soexcept(struct knote *kn, long hint) 2538 { 2539 struct socket *so = kn->kn_fp->f_data; 2540 int rv = 0; 2541 2542 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2543 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2544 soassertlocked_readonly(so); 2545 2546 #ifdef SOCKET_SPLICE 2547 if (isspliced(so)) { 2548 rv = 0; 2549 } else 2550 #endif /* SOCKET_SPLICE */ 2551 if (kn->kn_sfflags & NOTE_OOB) { 2552 if (so->so_oobmark || (so->so_rcv.sb_state & SS_RCVATMARK)) { 2553 kn->kn_fflags |= NOTE_OOB; 2554 kn->kn_data -= so->so_oobmark; 2555 rv = 1; 2556 } 2557 } 2558 2559 if (kn->kn_flags & __EV_POLL) { 2560 u_int state = READ_ONCE(so->so_state); 2561 2562 if (state & SS_ISDISCONNECTED) { 2563 kn->kn_flags |= __EV_HUP; 2564 rv = 1; 2565 } 2566 } 2567 2568 return rv; 2569 } 2570 2571 int 2572 filt_sowmodify(struct kevent *kev, struct knote *kn) 2573 { 2574 struct socket *so = kn->kn_fp->f_data; 2575 int rv; 2576 2577 sofilt_lock(so, &so->so_snd); 2578 rv = knote_modify(kev, kn); 2579 sofilt_unlock(so, &so->so_snd); 2580 2581 return (rv); 2582 } 2583 2584 int 2585 filt_sowprocess(struct knote *kn, struct kevent *kev) 2586 { 2587 struct socket *so = kn->kn_fp->f_data; 2588 int rv; 2589 2590 sofilt_lock(so, &so->so_snd); 2591 rv = knote_process(kn, kev); 2592 sofilt_unlock(so, &so->so_snd); 2593 2594 return (rv); 2595 } 2596 2597 int 2598 filt_sormodify(struct kevent *kev, struct knote *kn) 2599 { 2600 struct socket *so = kn->kn_fp->f_data; 2601 int rv; 2602 2603 sofilt_lock(so, &so->so_rcv); 2604 rv = knote_modify(kev, kn); 2605 sofilt_unlock(so, &so->so_rcv); 2606 2607 return (rv); 2608 } 2609 2610 int 2611 filt_sorprocess(struct knote *kn, struct kevent *kev) 2612 { 2613 struct socket *so = kn->kn_fp->f_data; 2614 int rv; 2615 2616 sofilt_lock(so, &so->so_rcv); 2617 rv = knote_process(kn, kev); 2618 sofilt_unlock(so, &so->so_rcv); 2619 2620 return (rv); 2621 } 2622 2623 #ifdef DDB 2624 void 2625 sobuf_print(struct sockbuf *, 2626 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2627 2628 void 2629 sobuf_print(struct sockbuf *sb, 2630 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2631 { 2632 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2633 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2634 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2635 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2636 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2637 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2638 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2639 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2640 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2641 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2642 (*pr)("\tsb_flags: %04x\n", sb->sb_flags); 2643 (*pr)("\tsb_state: %04x\n", sb->sb_state); 2644 (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); 2645 } 2646 2647 void 2648 so_print(void *v, 2649 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2650 { 2651 struct socket *so = v; 2652 2653 (*pr)("socket %p\n", so); 2654 (*pr)("so_type: %i\n", so->so_type); 2655 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2656 (*pr)("so_linger: %i\n", so->so_linger); 2657 (*pr)("so_state: 0x%04x\n", so->so_state); 2658 (*pr)("so_pcb: %p\n", so->so_pcb); 2659 (*pr)("so_proto: %p\n", so->so_proto); 2660 (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio); 2661 2662 (*pr)("so_head: %p\n", so->so_head); 2663 (*pr)("so_onq: %p\n", so->so_onq); 2664 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2665 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2666 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2667 (*pr)("so_q0len: %i\n", so->so_q0len); 2668 (*pr)("so_qlen: %i\n", so->so_qlen); 2669 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2670 (*pr)("so_timeo: %i\n", so->so_timeo); 2671 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2672 2673 (*pr)("so_sp: %p\n", so->so_sp); 2674 if (so->so_sp != NULL) { 2675 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2676 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2677 (*pr)("\tssp_len: %lld\n", 2678 (unsigned long long)so->so_sp->ssp_len); 2679 (*pr)("\tssp_max: %lld\n", 2680 (unsigned long long)so->so_sp->ssp_max); 2681 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2682 so->so_sp->ssp_idletv.tv_usec); 2683 (*pr)("\tssp_idleto: %spending (@%i)\n", 2684 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2685 so->so_sp->ssp_idleto.to_time); 2686 } 2687 2688 (*pr)("so_rcv:\n"); 2689 sobuf_print(&so->so_rcv, pr); 2690 (*pr)("so_snd:\n"); 2691 sobuf_print(&so->so_snd, pr); 2692 2693 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2694 so->so_upcall, so->so_upcallarg); 2695 2696 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2697 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2698 (*pr)("so_cpid: %d\n", so->so_cpid); 2699 } 2700 #endif 2701