1 /* $OpenBSD: uipc_socket.c,v 1.344 2024/10/31 12:51:55 claudio Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/unpcb.h> 47 #include <sys/socketvar.h> 48 #include <sys/signalvar.h> 49 #include <sys/pool.h> 50 #include <sys/atomic.h> 51 #include <sys/rwlock.h> 52 #include <sys/time.h> 53 #include <sys/refcnt.h> 54 55 #ifdef DDB 56 #include <machine/db_machdep.h> 57 #endif 58 59 void sbsync(struct sockbuf *, struct mbuf *); 60 61 int sosplice(struct socket *, int, off_t, struct timeval *); 62 void sounsplice(struct socket *, struct socket *, int); 63 void soidle(void *); 64 void sotask(void *); 65 void soreaper(void *); 66 void soput(void *); 67 int somove(struct socket *, int); 68 void sorflush(struct socket *); 69 70 void filt_sordetach(struct knote *kn); 71 int filt_soread(struct knote *kn, long hint); 72 void filt_sowdetach(struct knote *kn); 73 int filt_sowrite(struct knote *kn, long hint); 74 int filt_soexcept(struct knote *kn, long hint); 75 76 int filt_sowmodify(struct kevent *kev, struct knote *kn); 77 int filt_sowprocess(struct knote *kn, struct kevent *kev); 78 79 int filt_sormodify(struct kevent *kev, struct knote *kn); 80 int filt_sorprocess(struct knote *kn, struct kevent *kev); 81 82 const struct filterops soread_filtops = { 83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 84 .f_attach = NULL, 85 .f_detach = filt_sordetach, 86 .f_event = filt_soread, 87 .f_modify = filt_sormodify, 88 .f_process = filt_sorprocess, 89 }; 90 91 const struct filterops sowrite_filtops = { 92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 93 .f_attach = NULL, 94 .f_detach = filt_sowdetach, 95 .f_event = filt_sowrite, 96 .f_modify = filt_sowmodify, 97 .f_process = filt_sowprocess, 98 }; 99 100 const struct filterops soexcept_filtops = { 101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 102 .f_attach = NULL, 103 .f_detach = filt_sordetach, 104 .f_event = filt_soexcept, 105 .f_modify = filt_sormodify, 106 .f_process = filt_sorprocess, 107 }; 108 109 #ifndef SOMINCONN 110 #define SOMINCONN 80 111 #endif /* SOMINCONN */ 112 113 int somaxconn = SOMAXCONN; 114 int sominconn = SOMINCONN; 115 116 struct pool socket_pool; 117 #ifdef SOCKET_SPLICE 118 struct pool sosplice_pool; 119 struct taskq *sosplice_taskq; 120 struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); 121 #endif 122 123 void 124 soinit(void) 125 { 126 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 127 "sockpl", NULL); 128 #ifdef SOCKET_SPLICE 129 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 130 "sosppl", NULL); 131 #endif 132 } 133 134 struct socket * 135 soalloc(const struct protosw *prp, int wait) 136 { 137 const struct domain *dp = prp->pr_domain; 138 struct socket *so; 139 140 so = pool_get(&socket_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 141 PR_ZERO); 142 if (so == NULL) 143 return (NULL); 144 rw_init_flags(&so->so_lock, dp->dom_name, RWL_DUPOK); 145 refcnt_init(&so->so_refcnt); 146 rw_init(&so->so_rcv.sb_lock, "sbufrcv"); 147 rw_init(&so->so_snd.sb_lock, "sbufsnd"); 148 mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0); 149 mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0); 150 klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx); 151 klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx); 152 sigio_init(&so->so_sigio); 153 TAILQ_INIT(&so->so_q0); 154 TAILQ_INIT(&so->so_q); 155 156 switch (dp->dom_family) { 157 case AF_INET: 158 case AF_INET6: 159 switch (prp->pr_type) { 160 case SOCK_RAW: 161 case SOCK_DGRAM: 162 so->so_snd.sb_flags |= SB_MTXLOCK; 163 so->so_rcv.sb_flags |= SB_MTXLOCK; 164 break; 165 } 166 break; 167 case AF_KEY: 168 case AF_ROUTE: 169 case AF_UNIX: 170 so->so_snd.sb_flags |= SB_MTXLOCK; 171 so->so_rcv.sb_flags |= SB_MTXLOCK; 172 break; 173 } 174 175 return (so); 176 } 177 178 /* 179 * Socket operation routines. 180 * These routines are called by the routines in 181 * sys_socket.c or from a system process, and 182 * implement the semantics of socket operations by 183 * switching out to the protocol specific routines. 184 */ 185 int 186 socreate(int dom, struct socket **aso, int type, int proto) 187 { 188 struct proc *p = curproc; /* XXX */ 189 const struct protosw *prp; 190 struct socket *so; 191 int error; 192 193 if (proto) 194 prp = pffindproto(dom, proto, type); 195 else 196 prp = pffindtype(dom, type); 197 if (prp == NULL || prp->pr_usrreqs == NULL) 198 return (EPROTONOSUPPORT); 199 if (prp->pr_type != type) 200 return (EPROTOTYPE); 201 so = soalloc(prp, M_WAIT); 202 so->so_type = type; 203 if (suser(p) == 0) 204 so->so_state = SS_PRIV; 205 so->so_ruid = p->p_ucred->cr_ruid; 206 so->so_euid = p->p_ucred->cr_uid; 207 so->so_rgid = p->p_ucred->cr_rgid; 208 so->so_egid = p->p_ucred->cr_gid; 209 so->so_cpid = p->p_p->ps_pid; 210 so->so_proto = prp; 211 so->so_snd.sb_timeo_nsecs = INFSLP; 212 so->so_rcv.sb_timeo_nsecs = INFSLP; 213 214 solock(so); 215 error = pru_attach(so, proto, M_WAIT); 216 if (error) { 217 so->so_state |= SS_NOFDREF; 218 /* sofree() calls sounlock(). */ 219 sofree(so, 0); 220 return (error); 221 } 222 sounlock(so); 223 *aso = so; 224 return (0); 225 } 226 227 int 228 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 229 { 230 soassertlocked(so); 231 return pru_bind(so, nam, p); 232 } 233 234 int 235 solisten(struct socket *so, int backlog) 236 { 237 int somaxconn_local = atomic_load_int(&somaxconn); 238 int sominconn_local = atomic_load_int(&sominconn); 239 int error; 240 241 switch (so->so_type) { 242 case SOCK_STREAM: 243 case SOCK_SEQPACKET: 244 break; 245 default: 246 return (EOPNOTSUPP); 247 } 248 249 soassertlocked(so); 250 251 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 252 return (EINVAL); 253 #ifdef SOCKET_SPLICE 254 if (isspliced(so) || issplicedback(so)) 255 return (EOPNOTSUPP); 256 #endif /* SOCKET_SPLICE */ 257 error = pru_listen(so); 258 if (error) 259 return (error); 260 if (TAILQ_FIRST(&so->so_q) == NULL) 261 so->so_options |= SO_ACCEPTCONN; 262 if (backlog < 0 || backlog > somaxconn_local) 263 backlog = somaxconn_local; 264 if (backlog < sominconn_local) 265 backlog = sominconn_local; 266 so->so_qlimit = backlog; 267 return (0); 268 } 269 270 #define SOSP_FREEING_READ 1 271 #define SOSP_FREEING_WRITE 2 272 void 273 sofree(struct socket *so, int keep_lock) 274 { 275 int persocket = solock_persocket(so); 276 277 soassertlocked(so); 278 279 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { 280 if (!keep_lock) 281 sounlock(so); 282 return; 283 } 284 if (so->so_head) { 285 struct socket *head = so->so_head; 286 287 /* 288 * We must not decommission a socket that's on the accept(2) 289 * queue. If we do, then accept(2) may hang after select(2) 290 * indicated that the listening socket was ready. 291 */ 292 if (so->so_onq == &head->so_q) { 293 if (!keep_lock) 294 sounlock(so); 295 return; 296 } 297 298 if (persocket) { 299 /* 300 * Concurrent close of `head' could 301 * abort `so' due to re-lock. 302 */ 303 soref(so); 304 soref(head); 305 sounlock(so); 306 solock(head); 307 solock(so); 308 309 if (so->so_onq != &head->so_q0) { 310 sounlock(head); 311 sounlock(so); 312 sorele(head); 313 sorele(so); 314 return; 315 } 316 317 sorele(head); 318 sorele(so); 319 } 320 321 soqremque(so, 0); 322 323 if (persocket) 324 sounlock(head); 325 } 326 327 switch (so->so_proto->pr_domain->dom_family) { 328 case AF_INET: 329 case AF_INET6: 330 if (so->so_proto->pr_type == SOCK_STREAM) 331 break; 332 /* FALLTHROUGH */ 333 default: 334 sounlock(so); 335 refcnt_finalize(&so->so_refcnt, "sofinal"); 336 solock(so); 337 break; 338 } 339 340 sigio_free(&so->so_sigio); 341 klist_free(&so->so_rcv.sb_klist); 342 klist_free(&so->so_snd.sb_klist); 343 344 mtx_enter(&so->so_snd.sb_mtx); 345 sbrelease(so, &so->so_snd); 346 mtx_leave(&so->so_snd.sb_mtx); 347 348 /* 349 * Unlocked dispose and cleanup is safe. Socket is unlinked 350 * from everywhere. Even concurrent sotask() thread will not 351 * call somove(). 352 */ 353 if (so->so_proto->pr_flags & PR_RIGHTS && 354 so->so_proto->pr_domain->dom_dispose) 355 (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 356 m_purge(so->so_rcv.sb_mb); 357 358 if (!keep_lock) 359 sounlock(so); 360 361 #ifdef SOCKET_SPLICE 362 if (so->so_sp) { 363 /* Reuse splice idle, sounsplice() has been called before. */ 364 timeout_set_flags(&so->so_sp->ssp_idleto, soreaper, so, 365 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 366 timeout_add(&so->so_sp->ssp_idleto, 0); 367 } else 368 #endif /* SOCKET_SPLICE */ 369 { 370 pool_put(&socket_pool, so); 371 } 372 } 373 374 static inline uint64_t 375 solinger_nsec(struct socket *so) 376 { 377 if (so->so_linger == 0) 378 return INFSLP; 379 380 return SEC_TO_NSEC(so->so_linger); 381 } 382 383 /* 384 * Close a socket on last file table reference removal. 385 * Initiate disconnect if connected. 386 * Free socket when disconnect complete. 387 */ 388 int 389 soclose(struct socket *so, int flags) 390 { 391 struct socket *so2; 392 int error = 0; 393 394 solock(so); 395 /* Revoke async IO early. There is a final revocation in sofree(). */ 396 sigio_free(&so->so_sigio); 397 if (so->so_state & SS_ISCONNECTED) { 398 if (so->so_pcb == NULL) 399 goto discard; 400 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 401 error = sodisconnect(so); 402 if (error) 403 goto drop; 404 } 405 if (so->so_options & SO_LINGER) { 406 if ((so->so_state & SS_ISDISCONNECTING) && 407 (flags & MSG_DONTWAIT)) 408 goto drop; 409 while (so->so_state & SS_ISCONNECTED) { 410 error = sosleep_nsec(so, &so->so_timeo, 411 PSOCK | PCATCH, "netcls", 412 solinger_nsec(so)); 413 if (error) 414 break; 415 } 416 } 417 } 418 drop: 419 if (so->so_pcb) { 420 int error2; 421 error2 = pru_detach(so); 422 if (error == 0) 423 error = error2; 424 } 425 if (so->so_options & SO_ACCEPTCONN) { 426 int persocket = solock_persocket(so); 427 428 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 429 if (persocket) 430 solock(so2); 431 (void) soqremque(so2, 0); 432 if (persocket) 433 sounlock(so); 434 soabort(so2); 435 if (persocket) 436 solock(so); 437 } 438 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 439 if (persocket) 440 solock(so2); 441 (void) soqremque(so2, 1); 442 if (persocket) 443 sounlock(so); 444 soabort(so2); 445 if (persocket) 446 solock(so); 447 } 448 } 449 discard: 450 if (so->so_state & SS_NOFDREF) 451 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 452 so->so_state |= SS_NOFDREF; 453 454 #ifdef SOCKET_SPLICE 455 if (so->so_sp) { 456 struct socket *soback; 457 458 if (so->so_proto->pr_flags & PR_WANTRCVD) { 459 /* 460 * Copy - Paste, but can't relock and sleep in 461 * sofree() in tcp(4) case. That's why tcp(4) 462 * still rely on solock() for splicing and 463 * unsplicing. 464 */ 465 466 if (issplicedback(so)) { 467 int freeing = SOSP_FREEING_WRITE; 468 469 if (so->so_sp->ssp_soback == so) 470 freeing |= SOSP_FREEING_READ; 471 sounsplice(so->so_sp->ssp_soback, so, freeing); 472 } 473 if (isspliced(so)) { 474 int freeing = SOSP_FREEING_READ; 475 476 if (so == so->so_sp->ssp_socket) 477 freeing |= SOSP_FREEING_WRITE; 478 sounsplice(so, so->so_sp->ssp_socket, freeing); 479 } 480 goto free; 481 } 482 483 sounlock(so); 484 mtx_enter(&so->so_snd.sb_mtx); 485 /* 486 * Concurrent sounsplice() locks `sb_mtx' mutexes on 487 * both `so_snd' and `so_rcv' before unsplice sockets. 488 */ 489 if ((soback = so->so_sp->ssp_soback) == NULL) { 490 mtx_leave(&so->so_snd.sb_mtx); 491 goto notsplicedback; 492 } 493 soref(soback); 494 mtx_leave(&so->so_snd.sb_mtx); 495 496 /* 497 * `so' can be only unspliced, and never spliced again. 498 * Thus if issplicedback(so) check is positive, socket is 499 * still spliced and `ssp_soback' points to the same 500 * socket that `soback'. 501 */ 502 sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR); 503 if (issplicedback(so)) { 504 int freeing = SOSP_FREEING_WRITE; 505 506 if (so->so_sp->ssp_soback == so) 507 freeing |= SOSP_FREEING_READ; 508 solock(soback); 509 sounsplice(so->so_sp->ssp_soback, so, freeing); 510 sounlock(soback); 511 } 512 sbunlock(&soback->so_rcv); 513 sorele(soback); 514 515 notsplicedback: 516 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 517 if (isspliced(so)) { 518 int freeing = SOSP_FREEING_READ; 519 520 if (so == so->so_sp->ssp_socket) 521 freeing |= SOSP_FREEING_WRITE; 522 solock(so); 523 sounsplice(so, so->so_sp->ssp_socket, freeing); 524 sounlock(so); 525 } 526 sbunlock(&so->so_rcv); 527 528 solock(so); 529 } 530 free: 531 #endif /* SOCKET_SPLICE */ 532 /* sofree() calls sounlock(). */ 533 sofree(so, 0); 534 return (error); 535 } 536 537 void 538 soabort(struct socket *so) 539 { 540 soassertlocked(so); 541 pru_abort(so); 542 } 543 544 int 545 soaccept(struct socket *so, struct mbuf *nam) 546 { 547 int error = 0; 548 549 soassertlocked(so); 550 551 if ((so->so_state & SS_NOFDREF) == 0) 552 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 553 so->so_state &= ~SS_NOFDREF; 554 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 555 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 556 error = pru_accept(so, nam); 557 else 558 error = ECONNABORTED; 559 return (error); 560 } 561 562 int 563 soconnect(struct socket *so, struct mbuf *nam) 564 { 565 int error; 566 567 soassertlocked(so); 568 569 if (so->so_options & SO_ACCEPTCONN) 570 return (EOPNOTSUPP); 571 /* 572 * If protocol is connection-based, can only connect once. 573 * Otherwise, if connected, try to disconnect first. 574 * This allows user to disconnect by connecting to, e.g., 575 * a null address. 576 */ 577 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 578 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 579 (error = sodisconnect(so)))) 580 error = EISCONN; 581 else 582 error = pru_connect(so, nam); 583 return (error); 584 } 585 586 int 587 soconnect2(struct socket *so1, struct socket *so2) 588 { 589 int persocket, error; 590 591 if ((persocket = solock_persocket(so1))) 592 solock_pair(so1, so2); 593 else 594 solock(so1); 595 596 error = pru_connect2(so1, so2); 597 598 if (persocket) 599 sounlock(so2); 600 sounlock(so1); 601 return (error); 602 } 603 604 int 605 sodisconnect(struct socket *so) 606 { 607 int error; 608 609 soassertlocked(so); 610 611 if ((so->so_state & SS_ISCONNECTED) == 0) 612 return (ENOTCONN); 613 if (so->so_state & SS_ISDISCONNECTING) 614 return (EALREADY); 615 error = pru_disconnect(so); 616 return (error); 617 } 618 619 int m_getuio(struct mbuf **, int, long, struct uio *); 620 621 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 622 /* 623 * Send on a socket. 624 * If send must go all at once and message is larger than 625 * send buffering, then hard error. 626 * Lock against other senders. 627 * If must go all at once and not enough room now, then 628 * inform user that this would block and do nothing. 629 * Otherwise, if nonblocking, send as much as possible. 630 * The data to be sent is described by "uio" if nonzero, 631 * otherwise by the mbuf chain "top" (which must be null 632 * if uio is not). Data provided in mbuf chain must be small 633 * enough to send all at once. 634 * 635 * Returns nonzero on error, timeout or signal; callers 636 * must check for short counts if EINTR/ERESTART are returned. 637 * Data and control buffers are freed on return. 638 */ 639 int 640 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 641 struct mbuf *control, int flags) 642 { 643 long space, clen = 0; 644 size_t resid; 645 int error; 646 int atomic = sosendallatonce(so) || top; 647 int dosolock = ((so->so_snd.sb_flags & SB_MTXLOCK) == 0); 648 649 if (uio) 650 resid = uio->uio_resid; 651 else 652 resid = top->m_pkthdr.len; 653 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 654 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 655 m_freem(top); 656 m_freem(control); 657 return (EINVAL); 658 } 659 if (uio && uio->uio_procp) 660 uio->uio_procp->p_ru.ru_msgsnd++; 661 if (control) { 662 /* 663 * In theory clen should be unsigned (since control->m_len is). 664 * However, space must be signed, as it might be less than 0 665 * if we over-committed, and we must use a signed comparison 666 * of space and clen. 667 */ 668 clen = control->m_len; 669 /* reserve extra space for AF_UNIX's internalize */ 670 if (so->so_proto->pr_domain->dom_family == AF_UNIX && 671 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 672 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 673 clen = CMSG_SPACE( 674 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 675 (sizeof(struct fdpass) / sizeof(int))); 676 } 677 678 #define snderr(errno) { error = errno; goto release; } 679 680 restart: 681 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 682 goto out; 683 if (dosolock) 684 solock_shared(so); 685 sb_mtx_lock(&so->so_snd); 686 so->so_snd.sb_state |= SS_ISSENDING; 687 do { 688 if (so->so_snd.sb_state & SS_CANTSENDMORE) 689 snderr(EPIPE); 690 if ((error = READ_ONCE(so->so_error))) { 691 so->so_error = 0; 692 snderr(error); 693 } 694 if ((so->so_state & SS_ISCONNECTED) == 0) { 695 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 696 if (!(resid == 0 && clen != 0)) 697 snderr(ENOTCONN); 698 } else if (addr == NULL) 699 snderr(EDESTADDRREQ); 700 } 701 space = sbspace_locked(so, &so->so_snd); 702 if (flags & MSG_OOB) 703 space += 1024; 704 if (so->so_proto->pr_domain->dom_family == AF_UNIX) { 705 if (atomic && resid > so->so_snd.sb_hiwat) 706 snderr(EMSGSIZE); 707 } else { 708 if (clen > so->so_snd.sb_hiwat || 709 (atomic && resid > so->so_snd.sb_hiwat - clen)) 710 snderr(EMSGSIZE); 711 } 712 if (space < clen || 713 (space - clen < resid && 714 (atomic || space < so->so_snd.sb_lowat))) { 715 if (flags & MSG_DONTWAIT) 716 snderr(EWOULDBLOCK); 717 sbunlock(&so->so_snd); 718 error = sbwait(so, &so->so_snd); 719 so->so_snd.sb_state &= ~SS_ISSENDING; 720 sb_mtx_unlock(&so->so_snd); 721 if (dosolock) 722 sounlock_shared(so); 723 if (error) 724 goto out; 725 goto restart; 726 } 727 space -= clen; 728 do { 729 if (uio == NULL) { 730 /* 731 * Data is prepackaged in "top". 732 */ 733 resid = 0; 734 if (flags & MSG_EOR) 735 top->m_flags |= M_EOR; 736 } else { 737 sb_mtx_unlock(&so->so_snd); 738 if (dosolock) 739 sounlock_shared(so); 740 error = m_getuio(&top, atomic, space, uio); 741 if (dosolock) 742 solock_shared(so); 743 sb_mtx_lock(&so->so_snd); 744 if (error) 745 goto release; 746 space -= top->m_pkthdr.len; 747 resid = uio->uio_resid; 748 if (flags & MSG_EOR) 749 top->m_flags |= M_EOR; 750 } 751 if (resid == 0) 752 so->so_snd.sb_state &= ~SS_ISSENDING; 753 if (top && so->so_options & SO_ZEROIZE) 754 top->m_flags |= M_ZEROIZE; 755 sb_mtx_unlock(&so->so_snd); 756 if (!dosolock) 757 solock_shared(so); 758 if (flags & MSG_OOB) 759 error = pru_sendoob(so, top, addr, control); 760 else 761 error = pru_send(so, top, addr, control); 762 if (!dosolock) 763 sounlock_shared(so); 764 sb_mtx_lock(&so->so_snd); 765 clen = 0; 766 control = NULL; 767 top = NULL; 768 if (error) 769 goto release; 770 } while (resid && space > 0); 771 } while (resid); 772 773 release: 774 so->so_snd.sb_state &= ~SS_ISSENDING; 775 sb_mtx_unlock(&so->so_snd); 776 if (dosolock) 777 sounlock_shared(so); 778 sbunlock(&so->so_snd); 779 out: 780 m_freem(top); 781 m_freem(control); 782 return (error); 783 } 784 785 int 786 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 787 { 788 struct mbuf *m, *top = NULL; 789 struct mbuf **nextp = ⊤ 790 u_long len, mlen; 791 size_t resid = uio->uio_resid; 792 int error; 793 794 do { 795 if (top == NULL) { 796 MGETHDR(m, M_WAIT, MT_DATA); 797 mlen = MHLEN; 798 } else { 799 MGET(m, M_WAIT, MT_DATA); 800 mlen = MLEN; 801 } 802 /* chain mbuf together */ 803 *nextp = m; 804 nextp = &m->m_next; 805 806 resid = ulmin(resid, space); 807 if (resid >= MINCLSIZE) { 808 MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); 809 if ((m->m_flags & M_EXT) == 0) 810 MCLGETL(m, M_NOWAIT, MCLBYTES); 811 if ((m->m_flags & M_EXT) == 0) 812 goto nopages; 813 mlen = m->m_ext.ext_size; 814 len = ulmin(mlen, resid); 815 /* 816 * For datagram protocols, leave room 817 * for protocol headers in first mbuf. 818 */ 819 if (atomic && m == top && len < mlen - max_hdr) 820 m->m_data += max_hdr; 821 } else { 822 nopages: 823 len = ulmin(mlen, resid); 824 /* 825 * For datagram protocols, leave room 826 * for protocol headers in first mbuf. 827 */ 828 if (atomic && m == top && len < mlen - max_hdr) 829 m_align(m, len); 830 } 831 832 error = uiomove(mtod(m, caddr_t), len, uio); 833 if (error) { 834 m_freem(top); 835 return (error); 836 } 837 838 /* adjust counters */ 839 resid = uio->uio_resid; 840 space -= len; 841 m->m_len = len; 842 top->m_pkthdr.len += len; 843 844 /* Is there more space and more data? */ 845 } while (space > 0 && resid > 0); 846 847 *mp = top; 848 return 0; 849 } 850 851 /* 852 * Following replacement or removal of the first mbuf on the first 853 * mbuf chain of a socket buffer, push necessary state changes back 854 * into the socket buffer so that other consumers see the values 855 * consistently. 'nextrecord' is the callers locally stored value of 856 * the original value of sb->sb_mb->m_nextpkt which must be restored 857 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 858 */ 859 void 860 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 861 { 862 863 /* 864 * First, update for the new value of nextrecord. If necessary, 865 * make it the first record. 866 */ 867 if (sb->sb_mb != NULL) 868 sb->sb_mb->m_nextpkt = nextrecord; 869 else 870 sb->sb_mb = nextrecord; 871 872 /* 873 * Now update any dependent socket buffer fields to reflect 874 * the new state. This is an inline of SB_EMPTY_FIXUP, with 875 * the addition of a second clause that takes care of the 876 * case where sb_mb has been updated, but remains the last 877 * record. 878 */ 879 if (sb->sb_mb == NULL) { 880 sb->sb_mbtail = NULL; 881 sb->sb_lastrecord = NULL; 882 } else if (sb->sb_mb->m_nextpkt == NULL) 883 sb->sb_lastrecord = sb->sb_mb; 884 } 885 886 /* 887 * Implement receive operations on a socket. 888 * We depend on the way that records are added to the sockbuf 889 * by sbappend*. In particular, each record (mbufs linked through m_next) 890 * must begin with an address if the protocol so specifies, 891 * followed by an optional mbuf or mbufs containing ancillary data, 892 * and then zero or more mbufs of data. 893 * In order to avoid blocking network for the entire time here, we release 894 * the solock() while doing the actual copy to user space. 895 * Although the sockbuf is locked, new data may still be appended, 896 * and thus we must maintain consistency of the sockbuf during that time. 897 * 898 * The caller may receive the data as a single mbuf chain by supplying 899 * an mbuf **mp0 for use in returning the chain. The uio is then used 900 * only for the count in uio_resid. 901 */ 902 int 903 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 904 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 905 socklen_t controllen) 906 { 907 struct mbuf *m, **mp; 908 struct mbuf *cm; 909 u_long len, offset, moff; 910 int flags, error, error2, type, uio_error = 0; 911 const struct protosw *pr = so->so_proto; 912 struct mbuf *nextrecord; 913 size_t resid, orig_resid = uio->uio_resid; 914 int dosolock = ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0); 915 916 mp = mp0; 917 if (paddr) 918 *paddr = NULL; 919 if (controlp) 920 *controlp = NULL; 921 if (flagsp) 922 flags = *flagsp &~ MSG_EOR; 923 else 924 flags = 0; 925 if (flags & MSG_OOB) { 926 m = m_get(M_WAIT, MT_DATA); 927 solock(so); 928 error = pru_rcvoob(so, m, flags & MSG_PEEK); 929 sounlock(so); 930 if (error) 931 goto bad; 932 do { 933 error = uiomove(mtod(m, caddr_t), 934 ulmin(uio->uio_resid, m->m_len), uio); 935 m = m_free(m); 936 } while (uio->uio_resid && error == 0 && m); 937 bad: 938 m_freem(m); 939 return (error); 940 } 941 if (mp) 942 *mp = NULL; 943 944 restart: 945 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 946 return (error); 947 if (dosolock) 948 solock_shared(so); 949 sb_mtx_lock(&so->so_rcv); 950 951 m = so->so_rcv.sb_mb; 952 #ifdef SOCKET_SPLICE 953 if (isspliced(so)) 954 m = NULL; 955 #endif /* SOCKET_SPLICE */ 956 /* 957 * If we have less data than requested, block awaiting more 958 * (subject to any timeout) if: 959 * 1. the current count is less than the low water mark, 960 * 2. MSG_WAITALL is set, and it is possible to do the entire 961 * receive operation at once if we block (resid <= hiwat), or 962 * 3. MSG_DONTWAIT is not set. 963 * If MSG_WAITALL is set but resid is larger than the receive buffer, 964 * we have to do the receive in sections, and thus risk returning 965 * a short count if a timeout or signal occurs after we start. 966 */ 967 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 968 so->so_rcv.sb_cc < uio->uio_resid) && 969 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 970 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 971 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 972 #ifdef DIAGNOSTIC 973 if (m == NULL && so->so_rcv.sb_cc) 974 #ifdef SOCKET_SPLICE 975 if (!isspliced(so)) 976 #endif /* SOCKET_SPLICE */ 977 panic("receive 1: so %p, so_type %d, sb_cc %lu", 978 so, so->so_type, so->so_rcv.sb_cc); 979 #endif 980 if ((error2 = READ_ONCE(so->so_error))) { 981 if (m) 982 goto dontblock; 983 error = error2; 984 if ((flags & MSG_PEEK) == 0) 985 so->so_error = 0; 986 goto release; 987 } 988 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 989 if (m) 990 goto dontblock; 991 else if (so->so_rcv.sb_cc == 0) 992 goto release; 993 } 994 for (; m; m = m->m_next) 995 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 996 m = so->so_rcv.sb_mb; 997 goto dontblock; 998 } 999 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1000 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1001 error = ENOTCONN; 1002 goto release; 1003 } 1004 if (uio->uio_resid == 0 && controlp == NULL) 1005 goto release; 1006 if (flags & MSG_DONTWAIT) { 1007 error = EWOULDBLOCK; 1008 goto release; 1009 } 1010 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 1011 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 1012 1013 sbunlock(&so->so_rcv); 1014 error = sbwait(so, &so->so_rcv); 1015 sb_mtx_unlock(&so->so_rcv); 1016 if (dosolock) 1017 sounlock_shared(so); 1018 if (error) 1019 return (error); 1020 goto restart; 1021 } 1022 dontblock: 1023 /* 1024 * On entry here, m points to the first record of the socket buffer. 1025 * From this point onward, we maintain 'nextrecord' as a cache of the 1026 * pointer to the next record in the socket buffer. We must keep the 1027 * various socket buffer pointers and local stack versions of the 1028 * pointers in sync, pushing out modifications before operations that 1029 * may sleep, and re-reading them afterwards. 1030 * 1031 * Otherwise, we will race with the network stack appending new data 1032 * or records onto the socket buffer by using inconsistent/stale 1033 * versions of the field, possibly resulting in socket buffer 1034 * corruption. 1035 */ 1036 if (uio->uio_procp) 1037 uio->uio_procp->p_ru.ru_msgrcv++; 1038 KASSERT(m == so->so_rcv.sb_mb); 1039 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 1040 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 1041 nextrecord = m->m_nextpkt; 1042 if (pr->pr_flags & PR_ADDR) { 1043 #ifdef DIAGNOSTIC 1044 if (m->m_type != MT_SONAME) 1045 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 1046 so, so->so_type, m, m->m_type); 1047 #endif 1048 orig_resid = 0; 1049 if (flags & MSG_PEEK) { 1050 if (paddr) 1051 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 1052 m = m->m_next; 1053 } else { 1054 sbfree(so, &so->so_rcv, m); 1055 if (paddr) { 1056 *paddr = m; 1057 so->so_rcv.sb_mb = m->m_next; 1058 m->m_next = NULL; 1059 m = so->so_rcv.sb_mb; 1060 } else { 1061 so->so_rcv.sb_mb = m_free(m); 1062 m = so->so_rcv.sb_mb; 1063 } 1064 sbsync(&so->so_rcv, nextrecord); 1065 } 1066 } 1067 while (m && m->m_type == MT_CONTROL && error == 0) { 1068 int skip = 0; 1069 if (flags & MSG_PEEK) { 1070 if (mtod(m, struct cmsghdr *)->cmsg_type == 1071 SCM_RIGHTS) { 1072 /* don't leak internalized SCM_RIGHTS msgs */ 1073 skip = 1; 1074 } else if (controlp) 1075 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 1076 m = m->m_next; 1077 } else { 1078 sbfree(so, &so->so_rcv, m); 1079 so->so_rcv.sb_mb = m->m_next; 1080 m->m_nextpkt = m->m_next = NULL; 1081 cm = m; 1082 m = so->so_rcv.sb_mb; 1083 sbsync(&so->so_rcv, nextrecord); 1084 if (controlp) { 1085 if (pr->pr_domain->dom_externalize) { 1086 sb_mtx_unlock(&so->so_rcv); 1087 if (dosolock) 1088 sounlock_shared(so); 1089 error = 1090 (*pr->pr_domain->dom_externalize) 1091 (cm, controllen, flags); 1092 if (dosolock) 1093 solock_shared(so); 1094 sb_mtx_lock(&so->so_rcv); 1095 } 1096 *controlp = cm; 1097 } else { 1098 /* 1099 * Dispose of any SCM_RIGHTS message that went 1100 * through the read path rather than recv. 1101 */ 1102 if (pr->pr_domain->dom_dispose) { 1103 sb_mtx_unlock(&so->so_rcv); 1104 pr->pr_domain->dom_dispose(cm); 1105 sb_mtx_lock(&so->so_rcv); 1106 } 1107 m_free(cm); 1108 } 1109 } 1110 if (m != NULL) 1111 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1112 else 1113 nextrecord = so->so_rcv.sb_mb; 1114 if (controlp && !skip) 1115 controlp = &(*controlp)->m_next; 1116 orig_resid = 0; 1117 } 1118 1119 /* If m is non-NULL, we have some data to read. */ 1120 if (m) { 1121 type = m->m_type; 1122 if (type == MT_OOBDATA) 1123 flags |= MSG_OOB; 1124 if (m->m_flags & M_BCAST) 1125 flags |= MSG_BCAST; 1126 if (m->m_flags & M_MCAST) 1127 flags |= MSG_MCAST; 1128 } 1129 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1130 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1131 1132 moff = 0; 1133 offset = 0; 1134 while (m && uio->uio_resid > 0 && error == 0) { 1135 if (m->m_type == MT_OOBDATA) { 1136 if (type != MT_OOBDATA) 1137 break; 1138 } else if (type == MT_OOBDATA) { 1139 break; 1140 } else if (m->m_type == MT_CONTROL) { 1141 /* 1142 * If there is more than one control message in the 1143 * stream, we do a short read. Next can be received 1144 * or disposed by another system call. 1145 */ 1146 break; 1147 #ifdef DIAGNOSTIC 1148 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { 1149 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 1150 so, so->so_type, m, m->m_type); 1151 #endif 1152 } 1153 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1154 len = uio->uio_resid; 1155 if (so->so_oobmark && len > so->so_oobmark - offset) 1156 len = so->so_oobmark - offset; 1157 if (len > m->m_len - moff) 1158 len = m->m_len - moff; 1159 /* 1160 * If mp is set, just pass back the mbufs. 1161 * Otherwise copy them out via the uio, then free. 1162 * Sockbuf must be consistent here (points to current mbuf, 1163 * it points to next record) when we drop priority; 1164 * we must note any additions to the sockbuf when we 1165 * block interrupts again. 1166 */ 1167 if (mp == NULL && uio_error == 0) { 1168 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1169 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1170 resid = uio->uio_resid; 1171 sb_mtx_unlock(&so->so_rcv); 1172 if (dosolock) 1173 sounlock_shared(so); 1174 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1175 if (dosolock) 1176 solock_shared(so); 1177 sb_mtx_lock(&so->so_rcv); 1178 if (uio_error) 1179 uio->uio_resid = resid - len; 1180 } else 1181 uio->uio_resid -= len; 1182 if (len == m->m_len - moff) { 1183 if (m->m_flags & M_EOR) 1184 flags |= MSG_EOR; 1185 if (flags & MSG_PEEK) { 1186 m = m->m_next; 1187 moff = 0; 1188 orig_resid = 0; 1189 } else { 1190 nextrecord = m->m_nextpkt; 1191 sbfree(so, &so->so_rcv, m); 1192 if (mp) { 1193 *mp = m; 1194 mp = &m->m_next; 1195 so->so_rcv.sb_mb = m = m->m_next; 1196 *mp = NULL; 1197 } else { 1198 so->so_rcv.sb_mb = m_free(m); 1199 m = so->so_rcv.sb_mb; 1200 } 1201 /* 1202 * If m != NULL, we also know that 1203 * so->so_rcv.sb_mb != NULL. 1204 */ 1205 KASSERT(so->so_rcv.sb_mb == m); 1206 if (m) { 1207 m->m_nextpkt = nextrecord; 1208 if (nextrecord == NULL) 1209 so->so_rcv.sb_lastrecord = m; 1210 } else { 1211 so->so_rcv.sb_mb = nextrecord; 1212 SB_EMPTY_FIXUP(&so->so_rcv); 1213 } 1214 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1215 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1216 } 1217 } else { 1218 if (flags & MSG_PEEK) { 1219 moff += len; 1220 orig_resid = 0; 1221 } else { 1222 if (mp) 1223 *mp = m_copym(m, 0, len, M_WAIT); 1224 m->m_data += len; 1225 m->m_len -= len; 1226 so->so_rcv.sb_cc -= len; 1227 so->so_rcv.sb_datacc -= len; 1228 } 1229 } 1230 if (so->so_oobmark) { 1231 if ((flags & MSG_PEEK) == 0) { 1232 so->so_oobmark -= len; 1233 if (so->so_oobmark == 0) { 1234 so->so_rcv.sb_state |= SS_RCVATMARK; 1235 break; 1236 } 1237 } else { 1238 offset += len; 1239 if (offset == so->so_oobmark) 1240 break; 1241 } 1242 } 1243 if (flags & MSG_EOR) 1244 break; 1245 /* 1246 * If the MSG_WAITALL flag is set (for non-atomic socket), 1247 * we must not quit until "uio->uio_resid == 0" or an error 1248 * termination. If a signal/timeout occurs, return 1249 * with a short count but without error. 1250 * Keep sockbuf locked against other readers. 1251 */ 1252 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1253 !sosendallatonce(so) && !nextrecord) { 1254 if (so->so_rcv.sb_state & SS_CANTRCVMORE || 1255 so->so_error) 1256 break; 1257 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1258 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1259 if (sbwait(so, &so->so_rcv)) { 1260 sb_mtx_unlock(&so->so_rcv); 1261 if (dosolock) 1262 sounlock_shared(so); 1263 sbunlock(&so->so_rcv); 1264 return (0); 1265 } 1266 if ((m = so->so_rcv.sb_mb) != NULL) 1267 nextrecord = m->m_nextpkt; 1268 } 1269 } 1270 1271 if (m && pr->pr_flags & PR_ATOMIC) { 1272 flags |= MSG_TRUNC; 1273 if ((flags & MSG_PEEK) == 0) 1274 (void) sbdroprecord(so, &so->so_rcv); 1275 } 1276 if ((flags & MSG_PEEK) == 0) { 1277 if (m == NULL) { 1278 /* 1279 * First part is an inline SB_EMPTY_FIXUP(). Second 1280 * part makes sure sb_lastrecord is up-to-date if 1281 * there is still data in the socket buffer. 1282 */ 1283 so->so_rcv.sb_mb = nextrecord; 1284 if (so->so_rcv.sb_mb == NULL) { 1285 so->so_rcv.sb_mbtail = NULL; 1286 so->so_rcv.sb_lastrecord = NULL; 1287 } else if (nextrecord->m_nextpkt == NULL) 1288 so->so_rcv.sb_lastrecord = nextrecord; 1289 } 1290 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1291 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1292 if (pr->pr_flags & PR_WANTRCVD) { 1293 sb_mtx_unlock(&so->so_rcv); 1294 if (!dosolock) 1295 solock_shared(so); 1296 pru_rcvd(so); 1297 if (!dosolock) 1298 sounlock_shared(so); 1299 sb_mtx_lock(&so->so_rcv); 1300 } 1301 } 1302 if (orig_resid == uio->uio_resid && orig_resid && 1303 (flags & MSG_EOR) == 0 && 1304 (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) { 1305 sb_mtx_unlock(&so->so_rcv); 1306 sbunlock(&so->so_rcv); 1307 goto restart; 1308 } 1309 1310 if (uio_error) 1311 error = uio_error; 1312 1313 if (flagsp) 1314 *flagsp |= flags; 1315 release: 1316 sb_mtx_unlock(&so->so_rcv); 1317 if (dosolock) 1318 sounlock_shared(so); 1319 sbunlock(&so->so_rcv); 1320 return (error); 1321 } 1322 1323 int 1324 soshutdown(struct socket *so, int how) 1325 { 1326 int error = 0; 1327 1328 switch (how) { 1329 case SHUT_RD: 1330 sorflush(so); 1331 break; 1332 case SHUT_RDWR: 1333 sorflush(so); 1334 /* FALLTHROUGH */ 1335 case SHUT_WR: 1336 solock(so); 1337 error = pru_shutdown(so); 1338 sounlock(so); 1339 break; 1340 default: 1341 error = EINVAL; 1342 break; 1343 } 1344 1345 return (error); 1346 } 1347 1348 void 1349 sorflush(struct socket *so) 1350 { 1351 struct sockbuf *sb = &so->so_rcv; 1352 struct mbuf *m; 1353 const struct protosw *pr = so->so_proto; 1354 int error; 1355 1356 error = sblock(sb, SBL_WAIT | SBL_NOINTR); 1357 /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ 1358 KASSERT(error == 0); 1359 1360 solock_shared(so); 1361 socantrcvmore(so); 1362 mtx_enter(&sb->sb_mtx); 1363 m = sb->sb_mb; 1364 memset(&sb->sb_startzero, 0, 1365 (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); 1366 sb->sb_timeo_nsecs = INFSLP; 1367 mtx_leave(&sb->sb_mtx); 1368 sounlock_shared(so); 1369 sbunlock(sb); 1370 1371 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1372 (*pr->pr_domain->dom_dispose)(m); 1373 m_purge(m); 1374 } 1375 1376 #ifdef SOCKET_SPLICE 1377 1378 #define so_splicelen so_sp->ssp_len 1379 #define so_splicemax so_sp->ssp_max 1380 #define so_idletv so_sp->ssp_idletv 1381 #define so_idleto so_sp->ssp_idleto 1382 #define so_splicetask so_sp->ssp_task 1383 1384 int 1385 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1386 { 1387 struct file *fp; 1388 struct socket *sosp; 1389 struct taskq *tq; 1390 int error = 0; 1391 1392 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1393 return (EPROTONOSUPPORT); 1394 if (max && max < 0) 1395 return (EINVAL); 1396 if (tv && (tv->tv_sec < 0 || !timerisvalid(tv))) 1397 return (EINVAL); 1398 1399 /* If no fd is given, unsplice by removing existing link. */ 1400 if (fd < 0) { 1401 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1402 return (error); 1403 solock(so); 1404 if (so->so_options & SO_ACCEPTCONN) { 1405 error = EOPNOTSUPP; 1406 goto out; 1407 } 1408 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1409 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1410 error = ENOTCONN; 1411 goto out; 1412 } 1413 1414 if (so->so_sp && so->so_sp->ssp_socket) 1415 sounsplice(so, so->so_sp->ssp_socket, 0); 1416 out: 1417 sounlock(so); 1418 sbunlock(&so->so_rcv); 1419 return (error); 1420 } 1421 1422 if (sosplice_taskq == NULL) { 1423 rw_enter_write(&sosplice_lock); 1424 if (sosplice_taskq == NULL) { 1425 tq = taskq_create("sosplice", 1, IPL_SOFTNET, 1426 TASKQ_MPSAFE); 1427 if (tq == NULL) { 1428 rw_exit_write(&sosplice_lock); 1429 return (ENOMEM); 1430 } 1431 /* Ensure the taskq is fully visible to other CPUs. */ 1432 membar_producer(); 1433 sosplice_taskq = tq; 1434 } 1435 rw_exit_write(&sosplice_lock); 1436 } else { 1437 /* Ensure the taskq is fully visible on this CPU. */ 1438 membar_consumer(); 1439 } 1440 1441 /* Find sosp, the drain socket where data will be spliced into. */ 1442 if ((error = getsock(curproc, fd, &fp)) != 0) 1443 return (error); 1444 sosp = fp->f_data; 1445 1446 if (sosp->so_proto->pr_usrreqs->pru_send != 1447 so->so_proto->pr_usrreqs->pru_send) { 1448 error = EPROTONOSUPPORT; 1449 goto frele; 1450 } 1451 1452 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1453 goto frele; 1454 if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) { 1455 sbunlock(&so->so_rcv); 1456 goto frele; 1457 } 1458 solock(so); 1459 1460 if ((so->so_options & SO_ACCEPTCONN) || 1461 (sosp->so_options & SO_ACCEPTCONN)) { 1462 error = EOPNOTSUPP; 1463 goto release; 1464 } 1465 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1466 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1467 error = ENOTCONN; 1468 goto release; 1469 } 1470 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1471 error = ENOTCONN; 1472 goto release; 1473 } 1474 if (so->so_sp == NULL) 1475 so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1476 if (sosp->so_sp == NULL) 1477 sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1478 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1479 error = EBUSY; 1480 goto release; 1481 } 1482 1483 so->so_splicelen = 0; 1484 so->so_splicemax = max; 1485 if (tv) 1486 so->so_idletv = *tv; 1487 else 1488 timerclear(&so->so_idletv); 1489 timeout_set_flags(&so->so_idleto, soidle, so, 1490 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 1491 task_set(&so->so_splicetask, sotask, so); 1492 1493 /* 1494 * To prevent sorwakeup() calling somove() before this somove() 1495 * has finished, the socket buffers are not marked as spliced yet. 1496 */ 1497 1498 /* Splice so and sosp together. */ 1499 mtx_enter(&so->so_rcv.sb_mtx); 1500 mtx_enter(&sosp->so_snd.sb_mtx); 1501 so->so_sp->ssp_socket = sosp; 1502 sosp->so_sp->ssp_soback = so; 1503 mtx_leave(&sosp->so_snd.sb_mtx); 1504 mtx_leave(&so->so_rcv.sb_mtx); 1505 1506 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1507 sounlock(so); 1508 if (somove(so, M_WAIT)) { 1509 mtx_enter(&so->so_rcv.sb_mtx); 1510 mtx_enter(&sosp->so_snd.sb_mtx); 1511 so->so_rcv.sb_flags |= SB_SPLICE; 1512 sosp->so_snd.sb_flags |= SB_SPLICE; 1513 mtx_leave(&sosp->so_snd.sb_mtx); 1514 mtx_leave(&so->so_rcv.sb_mtx); 1515 } 1516 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1517 solock(so); 1518 1519 release: 1520 sounlock(so); 1521 sbunlock(&sosp->so_snd); 1522 sbunlock(&so->so_rcv); 1523 frele: 1524 FRELE(fp, curproc); 1525 1526 return (error); 1527 } 1528 1529 void 1530 sounsplice(struct socket *so, struct socket *sosp, int freeing) 1531 { 1532 if ((so->so_proto->pr_flags & PR_WANTRCVD) == 0) 1533 sbassertlocked(&so->so_rcv); 1534 soassertlocked(so); 1535 1536 task_del(sosplice_taskq, &so->so_splicetask); 1537 timeout_del(&so->so_idleto); 1538 1539 mtx_enter(&so->so_rcv.sb_mtx); 1540 mtx_enter(&sosp->so_snd.sb_mtx); 1541 so->so_rcv.sb_flags &= ~SB_SPLICE; 1542 sosp->so_snd.sb_flags &= ~SB_SPLICE; 1543 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1544 mtx_leave(&sosp->so_snd.sb_mtx); 1545 mtx_leave(&so->so_rcv.sb_mtx); 1546 1547 /* Do not wakeup a socket that is about to be freed. */ 1548 if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) 1549 sorwakeup(so); 1550 if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp)) 1551 sowwakeup(sosp); 1552 } 1553 1554 void 1555 soidle(void *arg) 1556 { 1557 struct socket *so = arg; 1558 1559 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1560 solock(so); 1561 /* 1562 * Depending on socket type, sblock(&so->so_rcv) or solock() 1563 * is always held while modifying SB_SPLICE and 1564 * so->so_sp->ssp_socket. 1565 */ 1566 if (so->so_rcv.sb_flags & SB_SPLICE) { 1567 so->so_error = ETIMEDOUT; 1568 sounsplice(so, so->so_sp->ssp_socket, 0); 1569 } 1570 sounlock(so); 1571 sbunlock(&so->so_rcv); 1572 } 1573 1574 void 1575 sotask(void *arg) 1576 { 1577 struct socket *so = arg; 1578 int doyield = 0; 1579 int sockstream = (so->so_proto->pr_flags & PR_WANTRCVD); 1580 1581 /* 1582 * sblock() on `so_rcv' protects sockets from being unspliced 1583 * for UDP case. TCP sockets still rely on solock(). 1584 */ 1585 1586 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1587 if (sockstream) 1588 solock(so); 1589 1590 if (so->so_rcv.sb_flags & SB_SPLICE) { 1591 if (sockstream) 1592 doyield = 1; 1593 somove(so, M_DONTWAIT); 1594 } 1595 1596 if (sockstream) 1597 sounlock(so); 1598 sbunlock(&so->so_rcv); 1599 1600 if (doyield) { 1601 /* Avoid user land starvation. */ 1602 yield(); 1603 } 1604 } 1605 1606 /* 1607 * The socket splicing task or idle timeout may sleep while grabbing the net 1608 * lock. As sofree() can be called anytime, sotask() or soidle() could access 1609 * the socket memory of a freed socket after wakeup. So delay the pool_put() 1610 * after all pending socket splicing tasks or timeouts have finished. Do this 1611 * by scheduling it on the same threads. 1612 */ 1613 void 1614 soreaper(void *arg) 1615 { 1616 struct socket *so = arg; 1617 1618 /* Reuse splice task, sounsplice() has been called before. */ 1619 task_set(&so->so_sp->ssp_task, soput, so); 1620 task_add(sosplice_taskq, &so->so_sp->ssp_task); 1621 } 1622 1623 void 1624 soput(void *arg) 1625 { 1626 struct socket *so = arg; 1627 1628 pool_put(&sosplice_pool, so->so_sp); 1629 pool_put(&socket_pool, so); 1630 } 1631 1632 /* 1633 * Move data from receive buffer of spliced source socket to send 1634 * buffer of drain socket. Try to move as much as possible in one 1635 * big chunk. It is a TCP only implementation. 1636 * Return value 0 means splicing has been finished, 1 continue. 1637 */ 1638 int 1639 somove(struct socket *so, int wait) 1640 { 1641 struct socket *sosp = so->so_sp->ssp_socket; 1642 struct mbuf *m, **mp, *nextrecord; 1643 u_long len, off, oobmark; 1644 long space; 1645 int error = 0, maxreached = 0, unsplice = 0; 1646 unsigned int rcvstate; 1647 int sockdgram = ((so->so_proto->pr_flags & 1648 PR_WANTRCVD) == 0); 1649 1650 if (sockdgram) 1651 sbassertlocked(&so->so_rcv); 1652 else 1653 soassertlocked(so); 1654 1655 mtx_enter(&so->so_rcv.sb_mtx); 1656 mtx_enter(&sosp->so_snd.sb_mtx); 1657 1658 nextpkt: 1659 if ((error = READ_ONCE(so->so_error))) 1660 goto release; 1661 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) { 1662 error = EPIPE; 1663 goto release; 1664 } 1665 1666 error = READ_ONCE(sosp->so_error); 1667 if (error) { 1668 if (error != ETIMEDOUT && error != EFBIG && error != ELOOP) 1669 goto release; 1670 error = 0; 1671 } 1672 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1673 goto release; 1674 1675 /* Calculate how many bytes can be copied now. */ 1676 len = so->so_rcv.sb_datacc; 1677 if (so->so_splicemax) { 1678 KASSERT(so->so_splicelen < so->so_splicemax); 1679 if (so->so_splicemax <= so->so_splicelen + len) { 1680 len = so->so_splicemax - so->so_splicelen; 1681 maxreached = 1; 1682 } 1683 } 1684 space = sbspace_locked(sosp, &sosp->so_snd); 1685 if (so->so_oobmark && so->so_oobmark < len && 1686 so->so_oobmark < space + 1024) 1687 space += 1024; 1688 if (space <= 0) { 1689 maxreached = 0; 1690 goto release; 1691 } 1692 if (space < len) { 1693 maxreached = 0; 1694 if (space < sosp->so_snd.sb_lowat) 1695 goto release; 1696 len = space; 1697 } 1698 sosp->so_snd.sb_state |= SS_ISSENDING; 1699 1700 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1701 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1702 m = so->so_rcv.sb_mb; 1703 if (m == NULL) 1704 goto release; 1705 nextrecord = m->m_nextpkt; 1706 1707 /* Drop address and control information not used with splicing. */ 1708 if (so->so_proto->pr_flags & PR_ADDR) { 1709 #ifdef DIAGNOSTIC 1710 if (m->m_type != MT_SONAME) 1711 panic("somove soname: so %p, so_type %d, m %p, " 1712 "m_type %d", so, so->so_type, m, m->m_type); 1713 #endif 1714 m = m->m_next; 1715 } 1716 while (m && m->m_type == MT_CONTROL) 1717 m = m->m_next; 1718 if (m == NULL) { 1719 sbdroprecord(so, &so->so_rcv); 1720 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1721 mtx_leave(&sosp->so_snd.sb_mtx); 1722 mtx_leave(&so->so_rcv.sb_mtx); 1723 pru_rcvd(so); 1724 mtx_enter(&so->so_rcv.sb_mtx); 1725 mtx_enter(&sosp->so_snd.sb_mtx); 1726 } 1727 goto nextpkt; 1728 } 1729 1730 /* 1731 * By splicing sockets connected to localhost, userland might create a 1732 * loop. Dissolve splicing with error if loop is detected by counter. 1733 * 1734 * If we deal with looped broadcast/multicast packet we bail out with 1735 * no error to suppress splice termination. 1736 */ 1737 if ((m->m_flags & M_PKTHDR) && 1738 ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) || 1739 ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) { 1740 error = ELOOP; 1741 goto release; 1742 } 1743 1744 if (so->so_proto->pr_flags & PR_ATOMIC) { 1745 if ((m->m_flags & M_PKTHDR) == 0) 1746 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1747 "m_type %d", so, so->so_type, m, m->m_type); 1748 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1749 error = EMSGSIZE; 1750 goto release; 1751 } 1752 if (len < m->m_pkthdr.len) 1753 goto release; 1754 if (m->m_pkthdr.len < len) { 1755 maxreached = 0; 1756 len = m->m_pkthdr.len; 1757 } 1758 /* 1759 * Throw away the name mbuf after it has been assured 1760 * that the whole first record can be processed. 1761 */ 1762 m = so->so_rcv.sb_mb; 1763 sbfree(so, &so->so_rcv, m); 1764 so->so_rcv.sb_mb = m_free(m); 1765 sbsync(&so->so_rcv, nextrecord); 1766 } 1767 /* 1768 * Throw away the control mbufs after it has been assured 1769 * that the whole first record can be processed. 1770 */ 1771 m = so->so_rcv.sb_mb; 1772 while (m && m->m_type == MT_CONTROL) { 1773 sbfree(so, &so->so_rcv, m); 1774 so->so_rcv.sb_mb = m_free(m); 1775 m = so->so_rcv.sb_mb; 1776 sbsync(&so->so_rcv, nextrecord); 1777 } 1778 1779 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1780 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1781 1782 /* Take at most len mbufs out of receive buffer. */ 1783 for (off = 0, mp = &m; off <= len && *mp; 1784 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1785 u_long size = len - off; 1786 1787 #ifdef DIAGNOSTIC 1788 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1789 panic("somove type: so %p, so_type %d, m %p, " 1790 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1791 #endif 1792 if ((*mp)->m_len > size) { 1793 /* 1794 * Move only a partial mbuf at maximum splice length or 1795 * if the drain buffer is too small for this large mbuf. 1796 */ 1797 if (!maxreached && sosp->so_snd.sb_datacc > 0) { 1798 len -= size; 1799 break; 1800 } 1801 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1802 if (*mp == NULL) { 1803 len -= size; 1804 break; 1805 } 1806 so->so_rcv.sb_mb->m_data += size; 1807 so->so_rcv.sb_mb->m_len -= size; 1808 so->so_rcv.sb_cc -= size; 1809 so->so_rcv.sb_datacc -= size; 1810 } else { 1811 *mp = so->so_rcv.sb_mb; 1812 sbfree(so, &so->so_rcv, *mp); 1813 so->so_rcv.sb_mb = (*mp)->m_next; 1814 sbsync(&so->so_rcv, nextrecord); 1815 } 1816 } 1817 *mp = NULL; 1818 1819 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1820 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1821 SBCHECK(so, &so->so_rcv); 1822 if (m == NULL) 1823 goto release; 1824 m->m_nextpkt = NULL; 1825 if (m->m_flags & M_PKTHDR) { 1826 m_resethdr(m); 1827 m->m_pkthdr.len = len; 1828 } 1829 1830 /* Send window update to source peer as receive buffer has changed. */ 1831 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1832 mtx_leave(&sosp->so_snd.sb_mtx); 1833 mtx_leave(&so->so_rcv.sb_mtx); 1834 pru_rcvd(so); 1835 mtx_enter(&so->so_rcv.sb_mtx); 1836 mtx_enter(&sosp->so_snd.sb_mtx); 1837 } 1838 1839 /* Receive buffer did shrink by len bytes, adjust oob. */ 1840 rcvstate = so->so_rcv.sb_state; 1841 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1842 oobmark = so->so_oobmark; 1843 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1844 if (oobmark) { 1845 if (oobmark == len) 1846 so->so_rcv.sb_state |= SS_RCVATMARK; 1847 if (oobmark >= len) 1848 oobmark = 0; 1849 } 1850 1851 /* 1852 * Handle oob data. If any malloc fails, ignore error. 1853 * TCP urgent data is not very reliable anyway. 1854 */ 1855 while (((rcvstate & SS_RCVATMARK) || oobmark) && 1856 (so->so_options & SO_OOBINLINE)) { 1857 struct mbuf *o = NULL; 1858 1859 if (rcvstate & SS_RCVATMARK) { 1860 o = m_get(wait, MT_DATA); 1861 rcvstate &= ~SS_RCVATMARK; 1862 } else if (oobmark) { 1863 o = m_split(m, oobmark, wait); 1864 if (o) { 1865 mtx_leave(&sosp->so_snd.sb_mtx); 1866 mtx_leave(&so->so_rcv.sb_mtx); 1867 error = pru_send(sosp, m, NULL, NULL); 1868 mtx_enter(&so->so_rcv.sb_mtx); 1869 mtx_enter(&sosp->so_snd.sb_mtx); 1870 1871 if (error) { 1872 if (sosp->so_snd.sb_state & 1873 SS_CANTSENDMORE) 1874 error = EPIPE; 1875 m_freem(o); 1876 goto release; 1877 } 1878 len -= oobmark; 1879 so->so_splicelen += oobmark; 1880 m = o; 1881 o = m_get(wait, MT_DATA); 1882 } 1883 oobmark = 0; 1884 } 1885 if (o) { 1886 o->m_len = 1; 1887 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1888 1889 mtx_leave(&sosp->so_snd.sb_mtx); 1890 mtx_leave(&so->so_rcv.sb_mtx); 1891 error = pru_sendoob(sosp, o, NULL, NULL); 1892 mtx_enter(&so->so_rcv.sb_mtx); 1893 mtx_enter(&sosp->so_snd.sb_mtx); 1894 1895 if (error) { 1896 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) 1897 error = EPIPE; 1898 m_freem(m); 1899 goto release; 1900 } 1901 len -= 1; 1902 so->so_splicelen += 1; 1903 if (oobmark) { 1904 oobmark -= 1; 1905 if (oobmark == 0) 1906 rcvstate |= SS_RCVATMARK; 1907 } 1908 m_adj(m, 1); 1909 } 1910 } 1911 1912 /* Append all remaining data to drain socket. */ 1913 if (so->so_rcv.sb_cc == 0 || maxreached) 1914 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1915 1916 mtx_leave(&sosp->so_snd.sb_mtx); 1917 mtx_leave(&so->so_rcv.sb_mtx); 1918 1919 if (sockdgram) 1920 solock_shared(sosp); 1921 error = pru_send(sosp, m, NULL, NULL); 1922 if (sockdgram) 1923 sounlock_shared(sosp); 1924 1925 mtx_enter(&so->so_rcv.sb_mtx); 1926 mtx_enter(&sosp->so_snd.sb_mtx); 1927 1928 if (error) { 1929 if (sosp->so_snd.sb_state & SS_CANTSENDMORE || 1930 sosp->so_pcb == NULL) 1931 error = EPIPE; 1932 goto release; 1933 } 1934 so->so_splicelen += len; 1935 1936 /* Move several packets if possible. */ 1937 if (!maxreached && nextrecord) 1938 goto nextpkt; 1939 1940 release: 1941 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1942 1943 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1944 error = EFBIG; 1945 if (error) 1946 WRITE_ONCE(so->so_error, error); 1947 1948 if (((so->so_rcv.sb_state & SS_CANTRCVMORE) && 1949 so->so_rcv.sb_cc == 0) || 1950 (sosp->so_snd.sb_state & SS_CANTSENDMORE) || 1951 maxreached || error) 1952 unsplice = 1; 1953 1954 mtx_leave(&sosp->so_snd.sb_mtx); 1955 mtx_leave(&so->so_rcv.sb_mtx); 1956 1957 if (unsplice) { 1958 if (sockdgram) 1959 solock(so); 1960 sounsplice(so, sosp, 0); 1961 if (sockdgram) 1962 sounlock(so); 1963 1964 return (0); 1965 } 1966 if (timerisset(&so->so_idletv)) 1967 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1968 return (1); 1969 } 1970 #endif /* SOCKET_SPLICE */ 1971 1972 void 1973 sorwakeup(struct socket *so) 1974 { 1975 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 1976 soassertlocked_readonly(so); 1977 1978 #ifdef SOCKET_SPLICE 1979 if (so->so_proto->pr_flags & PR_SPLICE) { 1980 sb_mtx_lock(&so->so_rcv); 1981 if (so->so_rcv.sb_flags & SB_SPLICE) 1982 task_add(sosplice_taskq, &so->so_splicetask); 1983 if (isspliced(so)) { 1984 sb_mtx_unlock(&so->so_rcv); 1985 return; 1986 } 1987 sb_mtx_unlock(&so->so_rcv); 1988 } 1989 #endif 1990 sowakeup(so, &so->so_rcv); 1991 if (so->so_upcall) 1992 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1993 } 1994 1995 void 1996 sowwakeup(struct socket *so) 1997 { 1998 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 1999 soassertlocked_readonly(so); 2000 2001 #ifdef SOCKET_SPLICE 2002 if (so->so_proto->pr_flags & PR_SPLICE) { 2003 sb_mtx_lock(&so->so_snd); 2004 if (so->so_snd.sb_flags & SB_SPLICE) 2005 task_add(sosplice_taskq, 2006 &so->so_sp->ssp_soback->so_splicetask); 2007 if (issplicedback(so)) { 2008 sb_mtx_unlock(&so->so_snd); 2009 return; 2010 } 2011 sb_mtx_unlock(&so->so_snd); 2012 } 2013 #endif 2014 sowakeup(so, &so->so_snd); 2015 } 2016 2017 int 2018 sosetopt(struct socket *so, int level, int optname, struct mbuf *m) 2019 { 2020 int error = 0; 2021 2022 if (level != SOL_SOCKET) { 2023 if (so->so_proto->pr_ctloutput) { 2024 solock(so); 2025 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 2026 level, optname, m); 2027 sounlock(so); 2028 return (error); 2029 } 2030 error = ENOPROTOOPT; 2031 } else { 2032 switch (optname) { 2033 2034 case SO_LINGER: 2035 if (m == NULL || m->m_len != sizeof (struct linger) || 2036 mtod(m, struct linger *)->l_linger < 0 || 2037 mtod(m, struct linger *)->l_linger > SHRT_MAX) 2038 return (EINVAL); 2039 2040 solock(so); 2041 so->so_linger = mtod(m, struct linger *)->l_linger; 2042 if (*mtod(m, int *)) 2043 so->so_options |= optname; 2044 else 2045 so->so_options &= ~optname; 2046 sounlock(so); 2047 2048 break; 2049 case SO_BINDANY: 2050 if ((error = suser(curproc)) != 0) /* XXX */ 2051 return (error); 2052 /* FALLTHROUGH */ 2053 2054 case SO_DEBUG: 2055 case SO_KEEPALIVE: 2056 case SO_USELOOPBACK: 2057 case SO_BROADCAST: 2058 case SO_REUSEADDR: 2059 case SO_REUSEPORT: 2060 case SO_OOBINLINE: 2061 case SO_TIMESTAMP: 2062 case SO_ZEROIZE: 2063 if (m == NULL || m->m_len < sizeof (int)) 2064 return (EINVAL); 2065 2066 solock(so); 2067 if (*mtod(m, int *)) 2068 so->so_options |= optname; 2069 else 2070 so->so_options &= ~optname; 2071 sounlock(so); 2072 2073 break; 2074 case SO_DONTROUTE: 2075 if (m == NULL || m->m_len < sizeof (int)) 2076 return (EINVAL); 2077 if (*mtod(m, int *)) 2078 error = EOPNOTSUPP; 2079 break; 2080 2081 case SO_SNDBUF: 2082 case SO_RCVBUF: 2083 case SO_SNDLOWAT: 2084 case SO_RCVLOWAT: 2085 { 2086 struct sockbuf *sb = (optname == SO_SNDBUF || 2087 optname == SO_SNDLOWAT ? 2088 &so->so_snd : &so->so_rcv); 2089 u_long cnt; 2090 2091 if (m == NULL || m->m_len < sizeof (int)) 2092 return (EINVAL); 2093 cnt = *mtod(m, int *); 2094 if ((long)cnt <= 0) 2095 cnt = 1; 2096 2097 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2098 solock(so); 2099 mtx_enter(&sb->sb_mtx); 2100 2101 switch (optname) { 2102 case SO_SNDBUF: 2103 case SO_RCVBUF: 2104 if (sb->sb_state & 2105 (SS_CANTSENDMORE | SS_CANTRCVMORE)) { 2106 error = EINVAL; 2107 break; 2108 } 2109 if (sbcheckreserve(cnt, sb->sb_wat) || 2110 sbreserve(so, sb, cnt)) { 2111 error = ENOBUFS; 2112 break; 2113 } 2114 sb->sb_wat = cnt; 2115 break; 2116 case SO_SNDLOWAT: 2117 case SO_RCVLOWAT: 2118 sb->sb_lowat = (cnt > sb->sb_hiwat) ? 2119 sb->sb_hiwat : cnt; 2120 break; 2121 } 2122 2123 mtx_leave(&sb->sb_mtx); 2124 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2125 sounlock(so); 2126 2127 break; 2128 } 2129 2130 case SO_SNDTIMEO: 2131 case SO_RCVTIMEO: 2132 { 2133 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2134 &so->so_snd : &so->so_rcv); 2135 struct timeval tv; 2136 uint64_t nsecs; 2137 2138 if (m == NULL || m->m_len < sizeof (tv)) 2139 return (EINVAL); 2140 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 2141 if (!timerisvalid(&tv)) 2142 return (EINVAL); 2143 nsecs = TIMEVAL_TO_NSEC(&tv); 2144 if (nsecs == UINT64_MAX) 2145 return (EDOM); 2146 if (nsecs == 0) 2147 nsecs = INFSLP; 2148 2149 mtx_enter(&sb->sb_mtx); 2150 sb->sb_timeo_nsecs = nsecs; 2151 mtx_leave(&sb->sb_mtx); 2152 break; 2153 } 2154 2155 case SO_RTABLE: 2156 if (so->so_proto->pr_domain && 2157 so->so_proto->pr_domain->dom_protosw && 2158 so->so_proto->pr_ctloutput) { 2159 const struct domain *dom = 2160 so->so_proto->pr_domain; 2161 2162 level = dom->dom_protosw->pr_protocol; 2163 solock(so); 2164 error = (*so->so_proto->pr_ctloutput) 2165 (PRCO_SETOPT, so, level, optname, m); 2166 sounlock(so); 2167 } else 2168 error = ENOPROTOOPT; 2169 break; 2170 #ifdef SOCKET_SPLICE 2171 case SO_SPLICE: 2172 if (m == NULL) { 2173 error = sosplice(so, -1, 0, NULL); 2174 } else if (m->m_len < sizeof(int)) { 2175 error = EINVAL; 2176 } else if (m->m_len < sizeof(struct splice)) { 2177 error = sosplice(so, *mtod(m, int *), 0, NULL); 2178 } else { 2179 error = sosplice(so, 2180 mtod(m, struct splice *)->sp_fd, 2181 mtod(m, struct splice *)->sp_max, 2182 &mtod(m, struct splice *)->sp_idle); 2183 } 2184 break; 2185 #endif /* SOCKET_SPLICE */ 2186 2187 default: 2188 error = ENOPROTOOPT; 2189 break; 2190 } 2191 } 2192 2193 return (error); 2194 } 2195 2196 int 2197 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 2198 { 2199 int error = 0; 2200 2201 if (level != SOL_SOCKET) { 2202 if (so->so_proto->pr_ctloutput) { 2203 m->m_len = 0; 2204 2205 solock(so); 2206 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 2207 level, optname, m); 2208 sounlock(so); 2209 return (error); 2210 } else 2211 return (ENOPROTOOPT); 2212 } else { 2213 m->m_len = sizeof (int); 2214 2215 switch (optname) { 2216 2217 case SO_LINGER: 2218 m->m_len = sizeof (struct linger); 2219 solock_shared(so); 2220 mtod(m, struct linger *)->l_onoff = 2221 so->so_options & SO_LINGER; 2222 mtod(m, struct linger *)->l_linger = so->so_linger; 2223 sounlock_shared(so); 2224 break; 2225 2226 case SO_BINDANY: 2227 case SO_USELOOPBACK: 2228 case SO_DEBUG: 2229 case SO_KEEPALIVE: 2230 case SO_REUSEADDR: 2231 case SO_REUSEPORT: 2232 case SO_BROADCAST: 2233 case SO_OOBINLINE: 2234 case SO_ACCEPTCONN: 2235 case SO_TIMESTAMP: 2236 case SO_ZEROIZE: 2237 *mtod(m, int *) = so->so_options & optname; 2238 break; 2239 2240 case SO_DONTROUTE: 2241 *mtod(m, int *) = 0; 2242 break; 2243 2244 case SO_TYPE: 2245 *mtod(m, int *) = so->so_type; 2246 break; 2247 2248 case SO_ERROR: 2249 solock(so); 2250 *mtod(m, int *) = so->so_error; 2251 so->so_error = 0; 2252 sounlock(so); 2253 2254 break; 2255 2256 case SO_DOMAIN: 2257 *mtod(m, int *) = so->so_proto->pr_domain->dom_family; 2258 break; 2259 2260 case SO_PROTOCOL: 2261 *mtod(m, int *) = so->so_proto->pr_protocol; 2262 break; 2263 2264 case SO_SNDBUF: 2265 *mtod(m, int *) = so->so_snd.sb_hiwat; 2266 break; 2267 2268 case SO_RCVBUF: 2269 *mtod(m, int *) = so->so_rcv.sb_hiwat; 2270 break; 2271 2272 case SO_SNDLOWAT: 2273 *mtod(m, int *) = so->so_snd.sb_lowat; 2274 break; 2275 2276 case SO_RCVLOWAT: 2277 *mtod(m, int *) = so->so_rcv.sb_lowat; 2278 break; 2279 2280 case SO_SNDTIMEO: 2281 case SO_RCVTIMEO: 2282 { 2283 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2284 &so->so_snd : &so->so_rcv); 2285 struct timeval tv; 2286 uint64_t nsecs; 2287 2288 mtx_enter(&sb->sb_mtx); 2289 nsecs = sb->sb_timeo_nsecs; 2290 mtx_leave(&sb->sb_mtx); 2291 2292 m->m_len = sizeof(struct timeval); 2293 memset(&tv, 0, sizeof(tv)); 2294 if (nsecs != INFSLP) 2295 NSEC_TO_TIMEVAL(nsecs, &tv); 2296 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 2297 break; 2298 } 2299 2300 case SO_RTABLE: 2301 if (so->so_proto->pr_domain && 2302 so->so_proto->pr_domain->dom_protosw && 2303 so->so_proto->pr_ctloutput) { 2304 const struct domain *dom = 2305 so->so_proto->pr_domain; 2306 2307 level = dom->dom_protosw->pr_protocol; 2308 solock(so); 2309 error = (*so->so_proto->pr_ctloutput) 2310 (PRCO_GETOPT, so, level, optname, m); 2311 sounlock(so); 2312 if (error) 2313 return (error); 2314 break; 2315 } 2316 return (ENOPROTOOPT); 2317 2318 #ifdef SOCKET_SPLICE 2319 case SO_SPLICE: 2320 { 2321 off_t len; 2322 2323 m->m_len = sizeof(off_t); 2324 solock_shared(so); 2325 len = so->so_sp ? so->so_sp->ssp_len : 0; 2326 sounlock_shared(so); 2327 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 2328 break; 2329 } 2330 #endif /* SOCKET_SPLICE */ 2331 2332 case SO_PEERCRED: 2333 if (so->so_proto->pr_protocol == AF_UNIX) { 2334 struct unpcb *unp = sotounpcb(so); 2335 2336 solock(so); 2337 if (unp->unp_flags & UNP_FEIDS) { 2338 m->m_len = sizeof(unp->unp_connid); 2339 memcpy(mtod(m, caddr_t), 2340 &(unp->unp_connid), m->m_len); 2341 sounlock(so); 2342 break; 2343 } 2344 sounlock(so); 2345 2346 return (ENOTCONN); 2347 } 2348 return (EOPNOTSUPP); 2349 2350 default: 2351 return (ENOPROTOOPT); 2352 } 2353 return (0); 2354 } 2355 } 2356 2357 void 2358 sohasoutofband(struct socket *so) 2359 { 2360 pgsigio(&so->so_sigio, SIGURG, 0); 2361 knote(&so->so_rcv.sb_klist, 0); 2362 } 2363 2364 void 2365 sofilt_lock(struct socket *so, struct sockbuf *sb) 2366 { 2367 switch (so->so_proto->pr_domain->dom_family) { 2368 case PF_INET: 2369 case PF_INET6: 2370 NET_LOCK_SHARED(); 2371 break; 2372 default: 2373 rw_enter_write(&so->so_lock); 2374 break; 2375 } 2376 2377 mtx_enter(&sb->sb_mtx); 2378 } 2379 2380 void 2381 sofilt_unlock(struct socket *so, struct sockbuf *sb) 2382 { 2383 mtx_leave(&sb->sb_mtx); 2384 2385 switch (so->so_proto->pr_domain->dom_family) { 2386 case PF_INET: 2387 case PF_INET6: 2388 NET_UNLOCK_SHARED(); 2389 break; 2390 default: 2391 rw_exit_write(&so->so_lock); 2392 break; 2393 } 2394 } 2395 2396 int 2397 soo_kqfilter(struct file *fp, struct knote *kn) 2398 { 2399 struct socket *so = kn->kn_fp->f_data; 2400 struct sockbuf *sb; 2401 2402 switch (kn->kn_filter) { 2403 case EVFILT_READ: 2404 kn->kn_fop = &soread_filtops; 2405 sb = &so->so_rcv; 2406 break; 2407 case EVFILT_WRITE: 2408 kn->kn_fop = &sowrite_filtops; 2409 sb = &so->so_snd; 2410 break; 2411 case EVFILT_EXCEPT: 2412 kn->kn_fop = &soexcept_filtops; 2413 sb = &so->so_rcv; 2414 break; 2415 default: 2416 return (EINVAL); 2417 } 2418 2419 klist_insert(&sb->sb_klist, kn); 2420 2421 return (0); 2422 } 2423 2424 void 2425 filt_sordetach(struct knote *kn) 2426 { 2427 struct socket *so = kn->kn_fp->f_data; 2428 2429 klist_remove(&so->so_rcv.sb_klist, kn); 2430 } 2431 2432 int 2433 filt_soread(struct knote *kn, long hint) 2434 { 2435 struct socket *so = kn->kn_fp->f_data; 2436 int rv = 0; 2437 2438 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2439 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2440 soassertlocked_readonly(so); 2441 2442 if (so->so_options & SO_ACCEPTCONN) { 2443 if (so->so_rcv.sb_flags & SB_MTXLOCK) 2444 soassertlocked_readonly(so); 2445 2446 kn->kn_data = so->so_qlen; 2447 rv = (kn->kn_data != 0); 2448 2449 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { 2450 if (so->so_state & SS_ISDISCONNECTED) { 2451 kn->kn_flags |= __EV_HUP; 2452 rv = 1; 2453 } else { 2454 rv = soreadable(so); 2455 } 2456 } 2457 2458 return rv; 2459 } 2460 2461 kn->kn_data = so->so_rcv.sb_cc; 2462 #ifdef SOCKET_SPLICE 2463 if (isspliced(so)) { 2464 rv = 0; 2465 } else 2466 #endif /* SOCKET_SPLICE */ 2467 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 2468 kn->kn_flags |= EV_EOF; 2469 if (kn->kn_flags & __EV_POLL) { 2470 if (so->so_state & SS_ISDISCONNECTED) 2471 kn->kn_flags |= __EV_HUP; 2472 } 2473 kn->kn_fflags = so->so_error; 2474 rv = 1; 2475 } else if (so->so_error) { 2476 rv = 1; 2477 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2478 rv = (kn->kn_data >= kn->kn_sdata); 2479 } else { 2480 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 2481 } 2482 2483 return rv; 2484 } 2485 2486 void 2487 filt_sowdetach(struct knote *kn) 2488 { 2489 struct socket *so = kn->kn_fp->f_data; 2490 2491 klist_remove(&so->so_snd.sb_klist, kn); 2492 } 2493 2494 int 2495 filt_sowrite(struct knote *kn, long hint) 2496 { 2497 struct socket *so = kn->kn_fp->f_data; 2498 int rv; 2499 2500 MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); 2501 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2502 soassertlocked_readonly(so); 2503 2504 kn->kn_data = sbspace_locked(so, &so->so_snd); 2505 if (so->so_snd.sb_state & SS_CANTSENDMORE) { 2506 kn->kn_flags |= EV_EOF; 2507 if (kn->kn_flags & __EV_POLL) { 2508 if (so->so_state & SS_ISDISCONNECTED) 2509 kn->kn_flags |= __EV_HUP; 2510 } 2511 kn->kn_fflags = so->so_error; 2512 rv = 1; 2513 } else if (so->so_error) { 2514 rv = 1; 2515 } else if (((so->so_state & SS_ISCONNECTED) == 0) && 2516 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 2517 rv = 0; 2518 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2519 rv = (kn->kn_data >= kn->kn_sdata); 2520 } else { 2521 rv = (kn->kn_data >= so->so_snd.sb_lowat); 2522 } 2523 2524 return (rv); 2525 } 2526 2527 int 2528 filt_soexcept(struct knote *kn, long hint) 2529 { 2530 struct socket *so = kn->kn_fp->f_data; 2531 int rv = 0; 2532 2533 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2534 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2535 soassertlocked_readonly(so); 2536 2537 #ifdef SOCKET_SPLICE 2538 if (isspliced(so)) { 2539 rv = 0; 2540 } else 2541 #endif /* SOCKET_SPLICE */ 2542 if (kn->kn_sfflags & NOTE_OOB) { 2543 if (so->so_oobmark || (so->so_rcv.sb_state & SS_RCVATMARK)) { 2544 kn->kn_fflags |= NOTE_OOB; 2545 kn->kn_data -= so->so_oobmark; 2546 rv = 1; 2547 } 2548 } 2549 2550 if (kn->kn_flags & __EV_POLL) { 2551 if (so->so_state & SS_ISDISCONNECTED) { 2552 kn->kn_flags |= __EV_HUP; 2553 rv = 1; 2554 } 2555 } 2556 2557 return rv; 2558 } 2559 2560 int 2561 filt_sowmodify(struct kevent *kev, struct knote *kn) 2562 { 2563 struct socket *so = kn->kn_fp->f_data; 2564 int rv; 2565 2566 sofilt_lock(so, &so->so_snd); 2567 rv = knote_modify(kev, kn); 2568 sofilt_unlock(so, &so->so_snd); 2569 2570 return (rv); 2571 } 2572 2573 int 2574 filt_sowprocess(struct knote *kn, struct kevent *kev) 2575 { 2576 struct socket *so = kn->kn_fp->f_data; 2577 int rv; 2578 2579 sofilt_lock(so, &so->so_snd); 2580 rv = knote_process(kn, kev); 2581 sofilt_unlock(so, &so->so_snd); 2582 2583 return (rv); 2584 } 2585 2586 int 2587 filt_sormodify(struct kevent *kev, struct knote *kn) 2588 { 2589 struct socket *so = kn->kn_fp->f_data; 2590 int rv; 2591 2592 sofilt_lock(so, &so->so_rcv); 2593 rv = knote_modify(kev, kn); 2594 sofilt_unlock(so, &so->so_rcv); 2595 2596 return (rv); 2597 } 2598 2599 int 2600 filt_sorprocess(struct knote *kn, struct kevent *kev) 2601 { 2602 struct socket *so = kn->kn_fp->f_data; 2603 int rv; 2604 2605 sofilt_lock(so, &so->so_rcv); 2606 rv = knote_process(kn, kev); 2607 sofilt_unlock(so, &so->so_rcv); 2608 2609 return (rv); 2610 } 2611 2612 #ifdef DDB 2613 void 2614 sobuf_print(struct sockbuf *, 2615 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2616 2617 void 2618 sobuf_print(struct sockbuf *sb, 2619 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2620 { 2621 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2622 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2623 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2624 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2625 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2626 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2627 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2628 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2629 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2630 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2631 (*pr)("\tsb_flags: %04x\n", sb->sb_flags); 2632 (*pr)("\tsb_state: %04x\n", sb->sb_state); 2633 (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); 2634 } 2635 2636 void 2637 so_print(void *v, 2638 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2639 { 2640 struct socket *so = v; 2641 2642 (*pr)("socket %p\n", so); 2643 (*pr)("so_type: %i\n", so->so_type); 2644 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2645 (*pr)("so_linger: %i\n", so->so_linger); 2646 (*pr)("so_state: 0x%04x\n", so->so_state); 2647 (*pr)("so_pcb: %p\n", so->so_pcb); 2648 (*pr)("so_proto: %p\n", so->so_proto); 2649 (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio); 2650 2651 (*pr)("so_head: %p\n", so->so_head); 2652 (*pr)("so_onq: %p\n", so->so_onq); 2653 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2654 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2655 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2656 (*pr)("so_q0len: %i\n", so->so_q0len); 2657 (*pr)("so_qlen: %i\n", so->so_qlen); 2658 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2659 (*pr)("so_timeo: %i\n", so->so_timeo); 2660 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2661 2662 (*pr)("so_sp: %p\n", so->so_sp); 2663 if (so->so_sp != NULL) { 2664 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2665 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2666 (*pr)("\tssp_len: %lld\n", 2667 (unsigned long long)so->so_sp->ssp_len); 2668 (*pr)("\tssp_max: %lld\n", 2669 (unsigned long long)so->so_sp->ssp_max); 2670 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2671 so->so_sp->ssp_idletv.tv_usec); 2672 (*pr)("\tssp_idleto: %spending (@%i)\n", 2673 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2674 so->so_sp->ssp_idleto.to_time); 2675 } 2676 2677 (*pr)("so_rcv:\n"); 2678 sobuf_print(&so->so_rcv, pr); 2679 (*pr)("so_snd:\n"); 2680 sobuf_print(&so->so_snd, pr); 2681 2682 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2683 so->so_upcall, so->so_upcallarg); 2684 2685 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2686 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2687 (*pr)("so_cpid: %d\n", so->so_cpid); 2688 } 2689 #endif 2690