1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ 68 */ 69 70 #include "opt_inet.h" 71 #include "opt_sctp.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/fcntl.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/file.h> /* for struct knote */ 80 #include <sys/kernel.h> 81 #include <sys/event.h> 82 #include <sys/proc.h> 83 #include <sys/protosw.h> 84 #include <sys/socket.h> 85 #include <sys/socketvar.h> 86 #include <sys/socketops.h> 87 #include <sys/resourcevar.h> 88 #include <sys/signalvar.h> 89 #include <sys/sysctl.h> 90 #include <sys/uio.h> 91 #include <sys/jail.h> 92 #include <vm/vm_zone.h> 93 #include <vm/pmap.h> 94 95 #include <sys/thread2.h> 96 #include <sys/socketvar2.h> 97 98 #include <machine/limits.h> 99 100 extern int tcp_sosnd_agglim; 101 extern int tcp_sosnd_async; 102 103 #ifdef INET 104 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 105 #endif /* INET */ 106 107 static void filt_sordetach(struct knote *kn); 108 static int filt_soread(struct knote *kn, long hint); 109 static void filt_sowdetach(struct knote *kn); 110 static int filt_sowrite(struct knote *kn, long hint); 111 static int filt_solisten(struct knote *kn, long hint); 112 113 static struct filterops solisten_filtops = 114 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_solisten }; 115 static struct filterops soread_filtops = 116 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 117 static struct filterops sowrite_filtops = 118 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sowdetach, filt_sowrite }; 119 static struct filterops soexcept_filtops = 120 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 121 122 MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); 123 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 124 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 125 126 127 static int somaxconn = SOMAXCONN; 128 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 129 &somaxconn, 0, "Maximum pending socket connection queue size"); 130 131 /* 132 * Socket operation routines. 133 * These routines are called by the routines in 134 * sys_socket.c or from a system process, and 135 * implement the semantics of socket operations by 136 * switching out to the protocol specific routines. 137 */ 138 139 /* 140 * Get a socket structure, and initialize it. 141 * Note that it would probably be better to allocate socket 142 * and PCB at the same time, but I'm not convinced that all 143 * the protocols can be easily modified to do this. 144 */ 145 struct socket * 146 soalloc(int waitok) 147 { 148 struct socket *so; 149 unsigned waitmask; 150 151 waitmask = waitok ? M_WAITOK : M_NOWAIT; 152 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); 153 if (so) { 154 /* XXX race condition for reentrant kernel */ 155 TAILQ_INIT(&so->so_aiojobq); 156 TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); 157 TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); 158 lwkt_token_init(&so->so_rcv.ssb_token, "rcvtok"); 159 lwkt_token_init(&so->so_snd.ssb_token, "sndtok"); 160 so->so_state = SS_NOFDREF; 161 so->so_refs = 1; 162 } 163 return so; 164 } 165 166 int 167 socreate(int dom, struct socket **aso, int type, 168 int proto, struct thread *td) 169 { 170 struct proc *p = td->td_proc; 171 struct protosw *prp; 172 struct socket *so; 173 struct pru_attach_info ai; 174 int error; 175 176 if (proto) 177 prp = pffindproto(dom, proto, type); 178 else 179 prp = pffindtype(dom, type); 180 181 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) 182 return (EPROTONOSUPPORT); 183 184 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only && 185 prp->pr_domain->dom_family != PF_LOCAL && 186 prp->pr_domain->dom_family != PF_INET && 187 prp->pr_domain->dom_family != PF_INET6 && 188 prp->pr_domain->dom_family != PF_ROUTE) { 189 return (EPROTONOSUPPORT); 190 } 191 192 if (prp->pr_type != type) 193 return (EPROTOTYPE); 194 so = soalloc(p != 0); 195 if (so == NULL) 196 return (ENOBUFS); 197 198 /* 199 * Callers of socreate() presumably will connect up a descriptor 200 * and call soclose() if they cannot. This represents our so_refs 201 * (which should be 1) from soalloc(). 202 */ 203 soclrstate(so, SS_NOFDREF); 204 205 /* 206 * Set a default port for protocol processing. No action will occur 207 * on the socket on this port until an inpcb is attached to it and 208 * is able to match incoming packets, or until the socket becomes 209 * available to userland. 210 * 211 * We normally default the socket to the protocol thread on cpu 0. 212 * If PR_SYNC_PORT is set (unix domain sockets) there is no protocol 213 * thread and all pr_*()/pru_*() calls are executed synchronously. 214 */ 215 if (prp->pr_flags & PR_SYNC_PORT) 216 so->so_port = &netisr_sync_port; 217 else 218 so->so_port = cpu_portfn(0); 219 220 TAILQ_INIT(&so->so_incomp); 221 TAILQ_INIT(&so->so_comp); 222 so->so_type = type; 223 so->so_cred = crhold(p->p_ucred); 224 so->so_proto = prp; 225 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; 226 ai.p_ucred = p->p_ucred; 227 ai.fd_rdir = p->p_fd->fd_rdir; 228 229 /* 230 * Auto-sizing of socket buffers is managed by the protocols and 231 * the appropriate flags must be set in the pru_attach function. 232 */ 233 error = so_pru_attach(so, proto, &ai); 234 if (error) { 235 sosetstate(so, SS_NOFDREF); 236 sofree(so); /* from soalloc */ 237 return error; 238 } 239 240 /* 241 * NOTE: Returns referenced socket. 242 */ 243 *aso = so; 244 return (0); 245 } 246 247 int 248 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 249 { 250 int error; 251 252 error = so_pru_bind(so, nam, td); 253 return (error); 254 } 255 256 static void 257 sodealloc(struct socket *so) 258 { 259 if (so->so_rcv.ssb_hiwat) 260 (void)chgsbsize(so->so_cred->cr_uidinfo, 261 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); 262 if (so->so_snd.ssb_hiwat) 263 (void)chgsbsize(so->so_cred->cr_uidinfo, 264 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); 265 #ifdef INET 266 /* remove accept filter if present */ 267 if (so->so_accf != NULL) 268 do_setopt_accept_filter(so, NULL); 269 #endif /* INET */ 270 crfree(so->so_cred); 271 kfree(so, M_SOCKET); 272 } 273 274 int 275 solisten(struct socket *so, int backlog, struct thread *td) 276 { 277 int error; 278 #ifdef SCTP 279 short oldopt, oldqlimit; 280 #endif /* SCTP */ 281 282 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) 283 return (EINVAL); 284 285 #ifdef SCTP 286 oldopt = so->so_options; 287 oldqlimit = so->so_qlimit; 288 #endif /* SCTP */ 289 290 lwkt_gettoken(&so->so_rcv.ssb_token); 291 if (TAILQ_EMPTY(&so->so_comp)) 292 so->so_options |= SO_ACCEPTCONN; 293 lwkt_reltoken(&so->so_rcv.ssb_token); 294 if (backlog < 0 || backlog > somaxconn) 295 backlog = somaxconn; 296 so->so_qlimit = backlog; 297 /* SCTP needs to look at tweak both the inbound backlog parameter AND 298 * the so_options (UDP model both connect's and gets inbound 299 * connections .. implicitly). 300 */ 301 error = so_pru_listen(so, td); 302 if (error) { 303 #ifdef SCTP 304 /* Restore the params */ 305 so->so_options = oldopt; 306 so->so_qlimit = oldqlimit; 307 #endif /* SCTP */ 308 return (error); 309 } 310 return (0); 311 } 312 313 /* 314 * Destroy a disconnected socket. This routine is a NOP if entities 315 * still have a reference on the socket: 316 * 317 * so_pcb - The protocol stack still has a reference 318 * SS_NOFDREF - There is no longer a file pointer reference 319 */ 320 void 321 sofree(struct socket *so) 322 { 323 struct socket *head; 324 325 /* 326 * This is a bit hackish at the moment. We need to interlock 327 * any accept queue we are on before we potentially lose the 328 * last reference to avoid races against a re-reference from 329 * someone operating on the queue. 330 */ 331 while ((head = so->so_head) != NULL) { 332 lwkt_getpooltoken(head); 333 if (so->so_head == head) 334 break; 335 lwkt_relpooltoken(head); 336 } 337 338 /* 339 * Arbitrage the last free. 340 */ 341 KKASSERT(so->so_refs > 0); 342 if (atomic_fetchadd_int(&so->so_refs, -1) != 1) { 343 if (head) 344 lwkt_relpooltoken(head); 345 return; 346 } 347 348 KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); 349 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 350 351 /* 352 * We're done, remove ourselves from the accept queue we are 353 * on, if we are on one. 354 */ 355 if (head != NULL) { 356 if (so->so_state & SS_INCOMP) { 357 TAILQ_REMOVE(&head->so_incomp, so, so_list); 358 head->so_incqlen--; 359 } else if (so->so_state & SS_COMP) { 360 /* 361 * We must not decommission a socket that's 362 * on the accept(2) queue. If we do, then 363 * accept(2) may hang after select(2) indicated 364 * that the listening socket was ready. 365 */ 366 lwkt_relpooltoken(head); 367 return; 368 } else { 369 panic("sofree: not queued"); 370 } 371 soclrstate(so, SS_INCOMP); 372 so->so_head = NULL; 373 lwkt_relpooltoken(head); 374 } 375 ssb_release(&so->so_snd, so); 376 sorflush(so); 377 sodealloc(so); 378 } 379 380 /* 381 * Close a socket on last file table reference removal. 382 * Initiate disconnect if connected. 383 * Free socket when disconnect complete. 384 */ 385 int 386 soclose(struct socket *so, int fflag) 387 { 388 int error = 0; 389 390 funsetown(&so->so_sigio); 391 if (so->so_pcb == NULL) 392 goto discard; 393 if (so->so_state & SS_ISCONNECTED) { 394 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 395 error = sodisconnect(so); 396 if (error) 397 goto drop; 398 } 399 if (so->so_options & SO_LINGER) { 400 if ((so->so_state & SS_ISDISCONNECTING) && 401 (fflag & FNONBLOCK)) 402 goto drop; 403 while (so->so_state & SS_ISCONNECTED) { 404 error = tsleep(&so->so_timeo, PCATCH, 405 "soclos", so->so_linger * hz); 406 if (error) 407 break; 408 } 409 } 410 } 411 drop: 412 if (so->so_pcb) { 413 int error2; 414 415 error2 = so_pru_detach(so); 416 if (error == 0) 417 error = error2; 418 } 419 discard: 420 lwkt_getpooltoken(so); 421 if (so->so_options & SO_ACCEPTCONN) { 422 struct socket *sp; 423 424 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 425 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 426 soclrstate(sp, SS_INCOMP); 427 sp->so_head = NULL; 428 so->so_incqlen--; 429 soaborta(sp); 430 } 431 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 432 TAILQ_REMOVE(&so->so_comp, sp, so_list); 433 soclrstate(sp, SS_COMP); 434 sp->so_head = NULL; 435 so->so_qlen--; 436 soaborta(sp); 437 } 438 } 439 lwkt_relpooltoken(so); 440 if (so->so_state & SS_NOFDREF) 441 panic("soclose: NOFDREF"); 442 sosetstate(so, SS_NOFDREF); /* take ref */ 443 444 /* Make sure all asychronous sending are done */ 445 so_pru_sync(so); 446 sofree(so); /* dispose of ref */ 447 return (error); 448 } 449 450 /* 451 * Abort and destroy a socket. Only one abort can be in progress 452 * at any given moment. 453 */ 454 void 455 soabort(struct socket *so) 456 { 457 soreference(so); 458 so_pru_abort(so); 459 } 460 461 void 462 soaborta(struct socket *so) 463 { 464 soreference(so); 465 so_pru_aborta(so); 466 } 467 468 void 469 soabort_oncpu(struct socket *so) 470 { 471 soreference(so); 472 so_pru_abort_oncpu(so); 473 } 474 475 /* 476 * so is passed in ref'd, which becomes owned by 477 * the cleared SS_NOFDREF flag. 478 */ 479 int 480 soaccept(struct socket *so, struct sockaddr **nam) 481 { 482 int error; 483 484 if ((so->so_state & SS_NOFDREF) == 0) 485 panic("soaccept: !NOFDREF"); 486 soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ 487 error = so_pru_accept_direct(so, nam); 488 return (error); 489 } 490 491 int 492 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 493 { 494 int error; 495 496 if (so->so_options & SO_ACCEPTCONN) 497 return (EOPNOTSUPP); 498 /* 499 * If protocol is connection-based, can only connect once. 500 * Otherwise, if connected, try to disconnect first. 501 * This allows user to disconnect by connecting to, e.g., 502 * a null address. 503 */ 504 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 505 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 506 (error = sodisconnect(so)))) { 507 error = EISCONN; 508 } else { 509 /* 510 * Prevent accumulated error from previous connection 511 * from biting us. 512 */ 513 so->so_error = 0; 514 error = so_pru_connect(so, nam, td); 515 } 516 return (error); 517 } 518 519 int 520 soconnect2(struct socket *so1, struct socket *so2) 521 { 522 int error; 523 524 error = so_pru_connect2(so1, so2); 525 return (error); 526 } 527 528 int 529 sodisconnect(struct socket *so) 530 { 531 int error; 532 533 if ((so->so_state & SS_ISCONNECTED) == 0) { 534 error = ENOTCONN; 535 goto bad; 536 } 537 if (so->so_state & SS_ISDISCONNECTING) { 538 error = EALREADY; 539 goto bad; 540 } 541 error = so_pru_disconnect(so); 542 bad: 543 return (error); 544 } 545 546 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 547 /* 548 * Send on a socket. 549 * If send must go all at once and message is larger than 550 * send buffering, then hard error. 551 * Lock against other senders. 552 * If must go all at once and not enough room now, then 553 * inform user that this would block and do nothing. 554 * Otherwise, if nonblocking, send as much as possible. 555 * The data to be sent is described by "uio" if nonzero, 556 * otherwise by the mbuf chain "top" (which must be null 557 * if uio is not). Data provided in mbuf chain must be small 558 * enough to send all at once. 559 * 560 * Returns nonzero on error, timeout or signal; callers 561 * must check for short counts if EINTR/ERESTART are returned. 562 * Data and control buffers are freed on return. 563 */ 564 int 565 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 566 struct mbuf *top, struct mbuf *control, int flags, 567 struct thread *td) 568 { 569 struct mbuf **mp; 570 struct mbuf *m; 571 size_t resid; 572 int space, len; 573 int clen = 0, error, dontroute, mlen; 574 int atomic = sosendallatonce(so) || top; 575 int pru_flags; 576 577 if (uio) { 578 resid = uio->uio_resid; 579 } else { 580 resid = (size_t)top->m_pkthdr.len; 581 #ifdef INVARIANTS 582 len = 0; 583 for (m = top; m; m = m->m_next) 584 len += m->m_len; 585 KKASSERT(top->m_pkthdr.len == len); 586 #endif 587 } 588 589 /* 590 * WARNING! resid is unsigned, space and len are signed. space 591 * can wind up negative if the sockbuf is overcommitted. 592 * 593 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 594 * type sockets since that's an error. 595 */ 596 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 597 error = EINVAL; 598 goto out; 599 } 600 601 dontroute = 602 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 603 (so->so_proto->pr_flags & PR_ATOMIC); 604 if (td->td_lwp != NULL) 605 td->td_lwp->lwp_ru.ru_msgsnd++; 606 if (control) 607 clen = control->m_len; 608 #define gotoerr(errcode) { error = errcode; goto release; } 609 610 restart: 611 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 612 if (error) 613 goto out; 614 615 do { 616 if (so->so_state & SS_CANTSENDMORE) 617 gotoerr(EPIPE); 618 if (so->so_error) { 619 error = so->so_error; 620 so->so_error = 0; 621 goto release; 622 } 623 if ((so->so_state & SS_ISCONNECTED) == 0) { 624 /* 625 * `sendto' and `sendmsg' is allowed on a connection- 626 * based socket if it supports implied connect. 627 * Return ENOTCONN if not connected and no address is 628 * supplied. 629 */ 630 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 631 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 632 if ((so->so_state & SS_ISCONFIRMING) == 0 && 633 !(resid == 0 && clen != 0)) 634 gotoerr(ENOTCONN); 635 } else if (addr == 0) 636 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 637 ENOTCONN : EDESTADDRREQ); 638 } 639 if ((atomic && resid > so->so_snd.ssb_hiwat) || 640 clen > so->so_snd.ssb_hiwat) { 641 gotoerr(EMSGSIZE); 642 } 643 space = ssb_space(&so->so_snd); 644 if (flags & MSG_OOB) 645 space += 1024; 646 if ((space < 0 || (size_t)space < resid + clen) && uio && 647 (atomic || space < so->so_snd.ssb_lowat || space < clen)) { 648 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 649 gotoerr(EWOULDBLOCK); 650 ssb_unlock(&so->so_snd); 651 error = ssb_wait(&so->so_snd); 652 if (error) 653 goto out; 654 goto restart; 655 } 656 mp = ⊤ 657 space -= clen; 658 do { 659 if (uio == NULL) { 660 /* 661 * Data is prepackaged in "top". 662 */ 663 resid = 0; 664 if (flags & MSG_EOR) 665 top->m_flags |= M_EOR; 666 } else do { 667 if (resid > INT_MAX) 668 resid = INT_MAX; 669 m = m_getl((int)resid, MB_WAIT, MT_DATA, 670 top == NULL ? M_PKTHDR : 0, &mlen); 671 if (top == NULL) { 672 m->m_pkthdr.len = 0; 673 m->m_pkthdr.rcvif = NULL; 674 } 675 len = imin((int)szmin(mlen, resid), space); 676 if (resid < MINCLSIZE) { 677 /* 678 * For datagram protocols, leave room 679 * for protocol headers in first mbuf. 680 */ 681 if (atomic && top == 0 && len < mlen) 682 MH_ALIGN(m, len); 683 } 684 space -= len; 685 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 686 resid = uio->uio_resid; 687 m->m_len = len; 688 *mp = m; 689 top->m_pkthdr.len += len; 690 if (error) 691 goto release; 692 mp = &m->m_next; 693 if (resid == 0) { 694 if (flags & MSG_EOR) 695 top->m_flags |= M_EOR; 696 break; 697 } 698 } while (space > 0 && atomic); 699 if (dontroute) 700 so->so_options |= SO_DONTROUTE; 701 if (flags & MSG_OOB) { 702 pru_flags = PRUS_OOB; 703 } else if ((flags & MSG_EOF) && 704 (so->so_proto->pr_flags & PR_IMPLOPCL) && 705 (resid == 0)) { 706 /* 707 * If the user set MSG_EOF, the protocol 708 * understands this flag and nothing left to 709 * send then use PRU_SEND_EOF instead of PRU_SEND. 710 */ 711 pru_flags = PRUS_EOF; 712 } else if (resid > 0 && space > 0) { 713 /* If there is more to send, set PRUS_MORETOCOME */ 714 pru_flags = PRUS_MORETOCOME; 715 } else { 716 pru_flags = 0; 717 } 718 /* 719 * XXX all the SS_CANTSENDMORE checks previously 720 * done could be out of date. We could have recieved 721 * a reset packet in an interrupt or maybe we slept 722 * while doing page faults in uiomove() etc. We could 723 * probably recheck again inside the splnet() protection 724 * here, but there are probably other places that this 725 * also happens. We must rethink this. 726 */ 727 error = so_pru_send(so, pru_flags, top, addr, control, td); 728 if (dontroute) 729 so->so_options &= ~SO_DONTROUTE; 730 clen = 0; 731 control = 0; 732 top = NULL; 733 mp = ⊤ 734 if (error) 735 goto release; 736 } while (resid && space > 0); 737 } while (resid); 738 739 release: 740 ssb_unlock(&so->so_snd); 741 out: 742 if (top) 743 m_freem(top); 744 if (control) 745 m_freem(control); 746 return (error); 747 } 748 749 /* 750 * A specialization of sosend() for UDP based on protocol-specific knowledge: 751 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that 752 * sosendallatonce() returns true, 753 * the "atomic" variable is true, 754 * and sosendudp() blocks until space is available for the entire send. 755 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or 756 * PR_IMPLOPCL flags set. 757 * UDP has no out-of-band data. 758 * UDP has no control data. 759 * UDP does not support MSG_EOR. 760 */ 761 int 762 sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, 763 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 764 { 765 boolean_t dontroute; /* temporary SO_DONTROUTE setting */ 766 size_t resid; 767 int error; 768 int space; 769 770 if (td->td_lwp != NULL) 771 td->td_lwp->lwp_ru.ru_msgsnd++; 772 if (control) 773 m_freem(control); 774 775 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); 776 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; 777 778 restart: 779 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 780 if (error) 781 goto out; 782 783 if (so->so_state & SS_CANTSENDMORE) 784 gotoerr(EPIPE); 785 if (so->so_error) { 786 error = so->so_error; 787 so->so_error = 0; 788 goto release; 789 } 790 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) 791 gotoerr(EDESTADDRREQ); 792 if (resid > so->so_snd.ssb_hiwat) 793 gotoerr(EMSGSIZE); 794 space = ssb_space(&so->so_snd); 795 if (uio && (space < 0 || (size_t)space < resid)) { 796 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 797 gotoerr(EWOULDBLOCK); 798 ssb_unlock(&so->so_snd); 799 error = ssb_wait(&so->so_snd); 800 if (error) 801 goto out; 802 goto restart; 803 } 804 805 if (uio) { 806 top = m_uiomove(uio); 807 if (top == NULL) 808 goto release; 809 } 810 811 dontroute = (flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE); 812 if (dontroute) 813 so->so_options |= SO_DONTROUTE; 814 815 error = so_pru_send(so, 0, top, addr, NULL, td); 816 top = NULL; /* sent or freed in lower layer */ 817 818 if (dontroute) 819 so->so_options &= ~SO_DONTROUTE; 820 821 release: 822 ssb_unlock(&so->so_snd); 823 out: 824 if (top) 825 m_freem(top); 826 return (error); 827 } 828 829 int 830 sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio, 831 struct mbuf *top, struct mbuf *control, int flags, 832 struct thread *td) 833 { 834 struct mbuf **mp; 835 struct mbuf *m; 836 size_t resid; 837 int space, len; 838 int error, mlen; 839 int allatonce; 840 int pru_flags; 841 842 if (uio) { 843 KKASSERT(top == NULL); 844 allatonce = 0; 845 resid = uio->uio_resid; 846 } else { 847 allatonce = 1; 848 resid = (size_t)top->m_pkthdr.len; 849 #ifdef INVARIANTS 850 len = 0; 851 for (m = top; m; m = m->m_next) 852 len += m->m_len; 853 KKASSERT(top->m_pkthdr.len == len); 854 #endif 855 } 856 857 /* 858 * WARNING! resid is unsigned, space and len are signed. space 859 * can wind up negative if the sockbuf is overcommitted. 860 * 861 * Also check to make sure that MSG_EOR isn't used on TCP 862 */ 863 if (flags & MSG_EOR) { 864 error = EINVAL; 865 goto out; 866 } 867 868 if (control) { 869 /* TCP doesn't do control messages (rights, creds, etc) */ 870 if (control->m_len) { 871 error = EINVAL; 872 goto out; 873 } 874 m_freem(control); /* empty control, just free it */ 875 control = NULL; 876 } 877 878 if (td->td_lwp != NULL) 879 td->td_lwp->lwp_ru.ru_msgsnd++; 880 881 #define gotoerr(errcode) { error = errcode; goto release; } 882 883 restart: 884 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 885 if (error) 886 goto out; 887 888 do { 889 if (so->so_state & SS_CANTSENDMORE) 890 gotoerr(EPIPE); 891 if (so->so_error) { 892 error = so->so_error; 893 so->so_error = 0; 894 goto release; 895 } 896 if ((so->so_state & SS_ISCONNECTED) == 0 && 897 (so->so_state & SS_ISCONFIRMING) == 0) 898 gotoerr(ENOTCONN); 899 if (allatonce && resid > so->so_snd.ssb_hiwat) 900 gotoerr(EMSGSIZE); 901 902 space = ssb_space(&so->so_snd); 903 if (flags & MSG_OOB) 904 space += 1024; 905 if ((space < 0 || (size_t)space < resid) && !allatonce && 906 space < so->so_snd.ssb_lowat) { 907 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 908 gotoerr(EWOULDBLOCK); 909 ssb_unlock(&so->so_snd); 910 error = ssb_wait(&so->so_snd); 911 if (error) 912 goto out; 913 goto restart; 914 } 915 mp = ⊤ 916 do { 917 int cnt = 0, async = 0; 918 919 if (uio == NULL) { 920 /* 921 * Data is prepackaged in "top". 922 */ 923 resid = 0; 924 } else do { 925 if (resid > INT_MAX) 926 resid = INT_MAX; 927 m = m_getl((int)resid, MB_WAIT, MT_DATA, 928 top == NULL ? M_PKTHDR : 0, &mlen); 929 if (top == NULL) { 930 m->m_pkthdr.len = 0; 931 m->m_pkthdr.rcvif = NULL; 932 } 933 len = imin((int)szmin(mlen, resid), space); 934 space -= len; 935 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 936 resid = uio->uio_resid; 937 m->m_len = len; 938 *mp = m; 939 top->m_pkthdr.len += len; 940 if (error) 941 goto release; 942 mp = &m->m_next; 943 if (resid == 0) 944 break; 945 ++cnt; 946 } while (space > 0 && cnt < tcp_sosnd_agglim); 947 948 if (tcp_sosnd_async) 949 async = 1; 950 951 if (flags & MSG_OOB) { 952 pru_flags = PRUS_OOB; 953 async = 0; 954 } else if ((flags & MSG_EOF) && resid == 0) { 955 pru_flags = PRUS_EOF; 956 } else if (resid > 0 && space > 0) { 957 /* If there is more to send, set PRUS_MORETOCOME */ 958 pru_flags = PRUS_MORETOCOME; 959 async = 1; 960 } else { 961 pru_flags = 0; 962 } 963 964 if (flags & MSG_SYNC) 965 async = 0; 966 967 /* 968 * XXX all the SS_CANTSENDMORE checks previously 969 * done could be out of date. We could have recieved 970 * a reset packet in an interrupt or maybe we slept 971 * while doing page faults in uiomove() etc. We could 972 * probably recheck again inside the splnet() protection 973 * here, but there are probably other places that this 974 * also happens. We must rethink this. 975 */ 976 if (!async) { 977 error = so_pru_send(so, pru_flags, top, 978 NULL, NULL, td); 979 } else { 980 so_pru_send_async(so, pru_flags, top, 981 NULL, NULL, td); 982 error = 0; 983 } 984 985 top = NULL; 986 mp = ⊤ 987 if (error) 988 goto release; 989 } while (resid && space > 0); 990 } while (resid); 991 992 release: 993 ssb_unlock(&so->so_snd); 994 out: 995 if (top) 996 m_freem(top); 997 if (control) 998 m_freem(control); 999 return (error); 1000 } 1001 1002 /* 1003 * Implement receive operations on a socket. 1004 * 1005 * We depend on the way that records are added to the signalsockbuf 1006 * by sbappend*. In particular, each record (mbufs linked through m_next) 1007 * must begin with an address if the protocol so specifies, 1008 * followed by an optional mbuf or mbufs containing ancillary data, 1009 * and then zero or more mbufs of data. 1010 * 1011 * Although the signalsockbuf is locked, new data may still be appended. 1012 * A token inside the ssb_lock deals with MP issues and still allows 1013 * the network to access the socket if we block in a uio. 1014 * 1015 * The caller may receive the data as a single mbuf chain by supplying 1016 * an mbuf **mp0 for use in returning the chain. The uio is then used 1017 * only for the count in uio_resid. 1018 */ 1019 int 1020 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 1021 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 1022 { 1023 struct mbuf *m, *n; 1024 struct mbuf *free_chain = NULL; 1025 int flags, len, error, offset; 1026 struct protosw *pr = so->so_proto; 1027 int moff, type = 0; 1028 size_t resid, orig_resid; 1029 1030 if (uio) 1031 resid = uio->uio_resid; 1032 else 1033 resid = (size_t)(sio->sb_climit - sio->sb_cc); 1034 orig_resid = resid; 1035 1036 if (psa) 1037 *psa = NULL; 1038 if (controlp) 1039 *controlp = NULL; 1040 if (flagsp) 1041 flags = *flagsp &~ MSG_EOR; 1042 else 1043 flags = 0; 1044 if (flags & MSG_OOB) { 1045 m = m_get(MB_WAIT, MT_DATA); 1046 if (m == NULL) 1047 return (ENOBUFS); 1048 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 1049 if (error) 1050 goto bad; 1051 if (sio) { 1052 do { 1053 sbappend(sio, m); 1054 KKASSERT(resid >= (size_t)m->m_len); 1055 resid -= (size_t)m->m_len; 1056 } while (resid > 0 && m); 1057 } else { 1058 do { 1059 uio->uio_resid = resid; 1060 error = uiomove(mtod(m, caddr_t), 1061 (int)szmin(resid, m->m_len), 1062 uio); 1063 resid = uio->uio_resid; 1064 m = m_free(m); 1065 } while (uio->uio_resid && error == 0 && m); 1066 } 1067 bad: 1068 if (m) 1069 m_freem(m); 1070 return (error); 1071 } 1072 if ((so->so_state & SS_ISCONFIRMING) && resid) 1073 so_pru_rcvd(so, 0); 1074 1075 /* 1076 * The token interlocks against the protocol thread while 1077 * ssb_lock is a blocking lock against other userland entities. 1078 */ 1079 lwkt_gettoken(&so->so_rcv.ssb_token); 1080 restart: 1081 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 1082 if (error) 1083 goto done; 1084 1085 m = so->so_rcv.ssb_mb; 1086 /* 1087 * If we have less data than requested, block awaiting more 1088 * (subject to any timeout) if: 1089 * 1. the current count is less than the low water mark, or 1090 * 2. MSG_WAITALL is set, and it is possible to do the entire 1091 * receive operation at once if we block (resid <= hiwat). 1092 * 3. MSG_DONTWAIT is not set 1093 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1094 * we have to do the receive in sections, and thus risk returning 1095 * a short count if a timeout or signal occurs after we start. 1096 */ 1097 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1098 (size_t)so->so_rcv.ssb_cc < resid) && 1099 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 1100 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) && 1101 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 1102 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 1103 if (so->so_error) { 1104 if (m) 1105 goto dontblock; 1106 error = so->so_error; 1107 if ((flags & MSG_PEEK) == 0) 1108 so->so_error = 0; 1109 goto release; 1110 } 1111 if (so->so_state & SS_CANTRCVMORE) { 1112 if (m) 1113 goto dontblock; 1114 else 1115 goto release; 1116 } 1117 for (; m; m = m->m_next) { 1118 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1119 m = so->so_rcv.ssb_mb; 1120 goto dontblock; 1121 } 1122 } 1123 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1124 (pr->pr_flags & PR_CONNREQUIRED)) { 1125 error = ENOTCONN; 1126 goto release; 1127 } 1128 if (resid == 0) 1129 goto release; 1130 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 1131 error = EWOULDBLOCK; 1132 goto release; 1133 } 1134 ssb_unlock(&so->so_rcv); 1135 error = ssb_wait(&so->so_rcv); 1136 if (error) 1137 goto done; 1138 goto restart; 1139 } 1140 dontblock: 1141 if (uio && uio->uio_td && uio->uio_td->td_proc) 1142 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 1143 1144 /* 1145 * note: m should be == sb_mb here. Cache the next record while 1146 * cleaning up. Note that calling m_free*() will break out critical 1147 * section. 1148 */ 1149 KKASSERT(m == so->so_rcv.ssb_mb); 1150 1151 /* 1152 * Skip any address mbufs prepending the record. 1153 */ 1154 if (pr->pr_flags & PR_ADDR) { 1155 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 1156 orig_resid = 0; 1157 if (psa) 1158 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1159 if (flags & MSG_PEEK) 1160 m = m->m_next; 1161 else 1162 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1163 } 1164 1165 /* 1166 * Skip any control mbufs prepending the record. 1167 */ 1168 #ifdef SCTP 1169 if (pr->pr_flags & PR_ADDR_OPT) { 1170 /* 1171 * For SCTP we may be getting a 1172 * whole message OR a partial delivery. 1173 */ 1174 if (m && m->m_type == MT_SONAME) { 1175 orig_resid = 0; 1176 if (psa) 1177 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1178 if (flags & MSG_PEEK) 1179 m = m->m_next; 1180 else 1181 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1182 } 1183 } 1184 #endif /* SCTP */ 1185 while (m && m->m_type == MT_CONTROL && error == 0) { 1186 if (flags & MSG_PEEK) { 1187 if (controlp) 1188 *controlp = m_copy(m, 0, m->m_len); 1189 m = m->m_next; /* XXX race */ 1190 } else { 1191 if (controlp) { 1192 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1193 if (pr->pr_domain->dom_externalize && 1194 mtod(m, struct cmsghdr *)->cmsg_type == 1195 SCM_RIGHTS) 1196 error = (*pr->pr_domain->dom_externalize)(m); 1197 *controlp = m; 1198 m = n; 1199 } else { 1200 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1201 } 1202 } 1203 if (controlp && *controlp) { 1204 orig_resid = 0; 1205 controlp = &(*controlp)->m_next; 1206 } 1207 } 1208 1209 /* 1210 * flag OOB data. 1211 */ 1212 if (m) { 1213 type = m->m_type; 1214 if (type == MT_OOBDATA) 1215 flags |= MSG_OOB; 1216 } 1217 1218 /* 1219 * Copy to the UIO or mbuf return chain (*mp). 1220 */ 1221 moff = 0; 1222 offset = 0; 1223 while (m && resid > 0 && error == 0) { 1224 if (m->m_type == MT_OOBDATA) { 1225 if (type != MT_OOBDATA) 1226 break; 1227 } else if (type == MT_OOBDATA) 1228 break; 1229 else 1230 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1231 ("receive 3")); 1232 soclrstate(so, SS_RCVATMARK); 1233 len = (resid > INT_MAX) ? INT_MAX : resid; 1234 if (so->so_oobmark && len > so->so_oobmark - offset) 1235 len = so->so_oobmark - offset; 1236 if (len > m->m_len - moff) 1237 len = m->m_len - moff; 1238 1239 /* 1240 * Copy out to the UIO or pass the mbufs back to the SIO. 1241 * The SIO is dealt with when we eat the mbuf, but deal 1242 * with the resid here either way. 1243 */ 1244 if (uio) { 1245 uio->uio_resid = resid; 1246 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1247 resid = uio->uio_resid; 1248 if (error) 1249 goto release; 1250 } else { 1251 resid -= (size_t)len; 1252 } 1253 1254 /* 1255 * Eat the entire mbuf or just a piece of it 1256 */ 1257 if (len == m->m_len - moff) { 1258 if (m->m_flags & M_EOR) 1259 flags |= MSG_EOR; 1260 #ifdef SCTP 1261 if (m->m_flags & M_NOTIFICATION) 1262 flags |= MSG_NOTIFICATION; 1263 #endif /* SCTP */ 1264 if (flags & MSG_PEEK) { 1265 m = m->m_next; 1266 moff = 0; 1267 } else { 1268 if (sio) { 1269 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1270 sbappend(sio, m); 1271 m = n; 1272 } else { 1273 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1274 } 1275 } 1276 } else { 1277 if (flags & MSG_PEEK) { 1278 moff += len; 1279 } else { 1280 if (sio) { 1281 n = m_copym(m, 0, len, MB_WAIT); 1282 if (n) 1283 sbappend(sio, n); 1284 } 1285 m->m_data += len; 1286 m->m_len -= len; 1287 so->so_rcv.ssb_cc -= len; 1288 } 1289 } 1290 if (so->so_oobmark) { 1291 if ((flags & MSG_PEEK) == 0) { 1292 so->so_oobmark -= len; 1293 if (so->so_oobmark == 0) { 1294 sosetstate(so, SS_RCVATMARK); 1295 break; 1296 } 1297 } else { 1298 offset += len; 1299 if (offset == so->so_oobmark) 1300 break; 1301 } 1302 } 1303 if (flags & MSG_EOR) 1304 break; 1305 /* 1306 * If the MSG_WAITALL flag is set (for non-atomic socket), 1307 * we must not quit until resid == 0 or an error 1308 * termination. If a signal/timeout occurs, return 1309 * with a short count but without error. 1310 * Keep signalsockbuf locked against other readers. 1311 */ 1312 while ((flags & MSG_WAITALL) && m == NULL && 1313 resid > 0 && !sosendallatonce(so) && 1314 so->so_rcv.ssb_mb == NULL) { 1315 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1316 break; 1317 /* 1318 * The window might have closed to zero, make 1319 * sure we send an ack now that we've drained 1320 * the buffer or we might end up blocking until 1321 * the idle takes over (5 seconds). 1322 */ 1323 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1324 so_pru_rcvd(so, flags); 1325 error = ssb_wait(&so->so_rcv); 1326 if (error) { 1327 ssb_unlock(&so->so_rcv); 1328 error = 0; 1329 goto done; 1330 } 1331 m = so->so_rcv.ssb_mb; 1332 } 1333 } 1334 1335 /* 1336 * If an atomic read was requested but unread data still remains 1337 * in the record, set MSG_TRUNC. 1338 */ 1339 if (m && pr->pr_flags & PR_ATOMIC) 1340 flags |= MSG_TRUNC; 1341 1342 /* 1343 * Cleanup. If an atomic read was requested drop any unread data. 1344 */ 1345 if ((flags & MSG_PEEK) == 0) { 1346 if (m && (pr->pr_flags & PR_ATOMIC)) 1347 sbdroprecord(&so->so_rcv.sb); 1348 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1349 so_pru_rcvd(so, flags); 1350 } 1351 1352 if (orig_resid == resid && orig_resid && 1353 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1354 ssb_unlock(&so->so_rcv); 1355 goto restart; 1356 } 1357 1358 if (flagsp) 1359 *flagsp |= flags; 1360 release: 1361 ssb_unlock(&so->so_rcv); 1362 done: 1363 lwkt_reltoken(&so->so_rcv.ssb_token); 1364 if (free_chain) 1365 m_freem(free_chain); 1366 return (error); 1367 } 1368 1369 /* 1370 * Shut a socket down. Note that we do not get a frontend lock as we 1371 * want to be able to shut the socket down even if another thread is 1372 * blocked in a read(), thus waking it up. 1373 */ 1374 int 1375 soshutdown(struct socket *so, int how) 1376 { 1377 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1378 return (EINVAL); 1379 1380 if (how != SHUT_WR) { 1381 /*ssb_lock(&so->so_rcv, M_WAITOK);*/ 1382 sorflush(so); 1383 /*ssb_unlock(&so->so_rcv);*/ 1384 } 1385 if (how != SHUT_RD) 1386 return (so_pru_shutdown(so)); 1387 return (0); 1388 } 1389 1390 void 1391 sorflush(struct socket *so) 1392 { 1393 struct signalsockbuf *ssb = &so->so_rcv; 1394 struct protosw *pr = so->so_proto; 1395 struct signalsockbuf asb; 1396 1397 atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); 1398 1399 lwkt_gettoken(&ssb->ssb_token); 1400 socantrcvmore(so); 1401 asb = *ssb; 1402 1403 /* 1404 * Can't just blow up the ssb structure here 1405 */ 1406 bzero(&ssb->sb, sizeof(ssb->sb)); 1407 ssb->ssb_timeo = 0; 1408 ssb->ssb_lowat = 0; 1409 ssb->ssb_hiwat = 0; 1410 ssb->ssb_mbmax = 0; 1411 atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); 1412 1413 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) 1414 (*pr->pr_domain->dom_dispose)(asb.ssb_mb); 1415 ssb_release(&asb, so); 1416 1417 lwkt_reltoken(&ssb->ssb_token); 1418 } 1419 1420 #ifdef INET 1421 static int 1422 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) 1423 { 1424 struct accept_filter_arg *afap = NULL; 1425 struct accept_filter *afp; 1426 struct so_accf *af = so->so_accf; 1427 int error = 0; 1428 1429 /* do not set/remove accept filters on non listen sockets */ 1430 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1431 error = EINVAL; 1432 goto out; 1433 } 1434 1435 /* removing the filter */ 1436 if (sopt == NULL) { 1437 if (af != NULL) { 1438 if (af->so_accept_filter != NULL && 1439 af->so_accept_filter->accf_destroy != NULL) { 1440 af->so_accept_filter->accf_destroy(so); 1441 } 1442 if (af->so_accept_filter_str != NULL) { 1443 FREE(af->so_accept_filter_str, M_ACCF); 1444 } 1445 FREE(af, M_ACCF); 1446 so->so_accf = NULL; 1447 } 1448 so->so_options &= ~SO_ACCEPTFILTER; 1449 return (0); 1450 } 1451 /* adding a filter */ 1452 /* must remove previous filter first */ 1453 if (af != NULL) { 1454 error = EINVAL; 1455 goto out; 1456 } 1457 /* don't put large objects on the kernel stack */ 1458 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK); 1459 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1460 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1461 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1462 if (error) 1463 goto out; 1464 afp = accept_filt_get(afap->af_name); 1465 if (afp == NULL) { 1466 error = ENOENT; 1467 goto out; 1468 } 1469 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1470 if (afp->accf_create != NULL) { 1471 if (afap->af_name[0] != '\0') { 1472 int len = strlen(afap->af_name) + 1; 1473 1474 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK); 1475 strcpy(af->so_accept_filter_str, afap->af_name); 1476 } 1477 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1478 if (af->so_accept_filter_arg == NULL) { 1479 FREE(af->so_accept_filter_str, M_ACCF); 1480 FREE(af, M_ACCF); 1481 so->so_accf = NULL; 1482 error = EINVAL; 1483 goto out; 1484 } 1485 } 1486 af->so_accept_filter = afp; 1487 so->so_accf = af; 1488 so->so_options |= SO_ACCEPTFILTER; 1489 out: 1490 if (afap != NULL) 1491 FREE(afap, M_TEMP); 1492 return (error); 1493 } 1494 #endif /* INET */ 1495 1496 /* 1497 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1498 * an additional variant to handle the case where the option value needs 1499 * to be some kind of integer, but not a specific size. 1500 * In addition to their use here, these functions are also called by the 1501 * protocol-level pr_ctloutput() routines. 1502 */ 1503 int 1504 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1505 { 1506 return soopt_to_kbuf(sopt, buf, len, minlen); 1507 } 1508 1509 int 1510 soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1511 { 1512 size_t valsize; 1513 1514 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1515 KKASSERT(kva_p(buf)); 1516 1517 /* 1518 * If the user gives us more than we wanted, we ignore it, 1519 * but if we don't get the minimum length the caller 1520 * wants, we return EINVAL. On success, sopt->sopt_valsize 1521 * is set to however much we actually retrieved. 1522 */ 1523 if ((valsize = sopt->sopt_valsize) < minlen) 1524 return EINVAL; 1525 if (valsize > len) 1526 sopt->sopt_valsize = valsize = len; 1527 1528 bcopy(sopt->sopt_val, buf, valsize); 1529 return 0; 1530 } 1531 1532 1533 int 1534 sosetopt(struct socket *so, struct sockopt *sopt) 1535 { 1536 int error, optval; 1537 struct linger l; 1538 struct timeval tv; 1539 u_long val; 1540 struct signalsockbuf *sotmp; 1541 1542 error = 0; 1543 sopt->sopt_dir = SOPT_SET; 1544 if (sopt->sopt_level != SOL_SOCKET) { 1545 if (so->so_proto && so->so_proto->pr_ctloutput) { 1546 return (so_pr_ctloutput(so, sopt)); 1547 } 1548 error = ENOPROTOOPT; 1549 } else { 1550 switch (sopt->sopt_name) { 1551 #ifdef INET 1552 case SO_ACCEPTFILTER: 1553 error = do_setopt_accept_filter(so, sopt); 1554 if (error) 1555 goto bad; 1556 break; 1557 #endif /* INET */ 1558 case SO_LINGER: 1559 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1560 if (error) 1561 goto bad; 1562 1563 so->so_linger = l.l_linger; 1564 if (l.l_onoff) 1565 so->so_options |= SO_LINGER; 1566 else 1567 so->so_options &= ~SO_LINGER; 1568 break; 1569 1570 case SO_DEBUG: 1571 case SO_KEEPALIVE: 1572 case SO_DONTROUTE: 1573 case SO_USELOOPBACK: 1574 case SO_BROADCAST: 1575 case SO_REUSEADDR: 1576 case SO_REUSEPORT: 1577 case SO_OOBINLINE: 1578 case SO_TIMESTAMP: 1579 error = sooptcopyin(sopt, &optval, sizeof optval, 1580 sizeof optval); 1581 if (error) 1582 goto bad; 1583 if (optval) 1584 so->so_options |= sopt->sopt_name; 1585 else 1586 so->so_options &= ~sopt->sopt_name; 1587 break; 1588 1589 case SO_SNDBUF: 1590 case SO_RCVBUF: 1591 case SO_SNDLOWAT: 1592 case SO_RCVLOWAT: 1593 error = sooptcopyin(sopt, &optval, sizeof optval, 1594 sizeof optval); 1595 if (error) 1596 goto bad; 1597 1598 /* 1599 * Values < 1 make no sense for any of these 1600 * options, so disallow them. 1601 */ 1602 if (optval < 1) { 1603 error = EINVAL; 1604 goto bad; 1605 } 1606 1607 switch (sopt->sopt_name) { 1608 case SO_SNDBUF: 1609 case SO_RCVBUF: 1610 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? 1611 &so->so_snd : &so->so_rcv, (u_long)optval, 1612 so, 1613 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { 1614 error = ENOBUFS; 1615 goto bad; 1616 } 1617 sotmp = (sopt->sopt_name == SO_SNDBUF) ? 1618 &so->so_snd : &so->so_rcv; 1619 atomic_clear_int(&sotmp->ssb_flags, 1620 SSB_AUTOSIZE); 1621 break; 1622 1623 /* 1624 * Make sure the low-water is never greater than 1625 * the high-water. 1626 */ 1627 case SO_SNDLOWAT: 1628 so->so_snd.ssb_lowat = 1629 (optval > so->so_snd.ssb_hiwat) ? 1630 so->so_snd.ssb_hiwat : optval; 1631 atomic_clear_int(&so->so_snd.ssb_flags, 1632 SSB_AUTOLOWAT); 1633 break; 1634 case SO_RCVLOWAT: 1635 so->so_rcv.ssb_lowat = 1636 (optval > so->so_rcv.ssb_hiwat) ? 1637 so->so_rcv.ssb_hiwat : optval; 1638 atomic_clear_int(&so->so_rcv.ssb_flags, 1639 SSB_AUTOLOWAT); 1640 break; 1641 } 1642 break; 1643 1644 case SO_SNDTIMEO: 1645 case SO_RCVTIMEO: 1646 error = sooptcopyin(sopt, &tv, sizeof tv, 1647 sizeof tv); 1648 if (error) 1649 goto bad; 1650 1651 /* assert(hz > 0); */ 1652 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 1653 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1654 error = EDOM; 1655 goto bad; 1656 } 1657 /* assert(tick > 0); */ 1658 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 1659 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; 1660 if (val > INT_MAX) { 1661 error = EDOM; 1662 goto bad; 1663 } 1664 if (val == 0 && tv.tv_usec != 0) 1665 val = 1; 1666 1667 switch (sopt->sopt_name) { 1668 case SO_SNDTIMEO: 1669 so->so_snd.ssb_timeo = val; 1670 break; 1671 case SO_RCVTIMEO: 1672 so->so_rcv.ssb_timeo = val; 1673 break; 1674 } 1675 break; 1676 default: 1677 error = ENOPROTOOPT; 1678 break; 1679 } 1680 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1681 (void) so_pr_ctloutput(so, sopt); 1682 } 1683 } 1684 bad: 1685 return (error); 1686 } 1687 1688 /* Helper routine for getsockopt */ 1689 int 1690 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 1691 { 1692 soopt_from_kbuf(sopt, buf, len); 1693 return 0; 1694 } 1695 1696 void 1697 soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) 1698 { 1699 size_t valsize; 1700 1701 if (len == 0) { 1702 sopt->sopt_valsize = 0; 1703 return; 1704 } 1705 1706 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1707 KKASSERT(kva_p(buf)); 1708 1709 /* 1710 * Documented get behavior is that we always return a value, 1711 * possibly truncated to fit in the user's buffer. 1712 * Traditional behavior is that we always tell the user 1713 * precisely how much we copied, rather than something useful 1714 * like the total amount we had available for her. 1715 * Note that this interface is not idempotent; the entire answer must 1716 * generated ahead of time. 1717 */ 1718 valsize = szmin(len, sopt->sopt_valsize); 1719 sopt->sopt_valsize = valsize; 1720 if (sopt->sopt_val != 0) { 1721 bcopy(buf, sopt->sopt_val, valsize); 1722 } 1723 } 1724 1725 int 1726 sogetopt(struct socket *so, struct sockopt *sopt) 1727 { 1728 int error, optval; 1729 long optval_l; 1730 struct linger l; 1731 struct timeval tv; 1732 #ifdef INET 1733 struct accept_filter_arg *afap; 1734 #endif 1735 1736 error = 0; 1737 sopt->sopt_dir = SOPT_GET; 1738 if (sopt->sopt_level != SOL_SOCKET) { 1739 if (so->so_proto && so->so_proto->pr_ctloutput) { 1740 return (so_pr_ctloutput(so, sopt)); 1741 } else 1742 return (ENOPROTOOPT); 1743 } else { 1744 switch (sopt->sopt_name) { 1745 #ifdef INET 1746 case SO_ACCEPTFILTER: 1747 if ((so->so_options & SO_ACCEPTCONN) == 0) 1748 return (EINVAL); 1749 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), 1750 M_TEMP, M_WAITOK | M_ZERO); 1751 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1752 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1753 if (so->so_accf->so_accept_filter_str != NULL) 1754 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1755 } 1756 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1757 FREE(afap, M_TEMP); 1758 break; 1759 #endif /* INET */ 1760 1761 case SO_LINGER: 1762 l.l_onoff = so->so_options & SO_LINGER; 1763 l.l_linger = so->so_linger; 1764 error = sooptcopyout(sopt, &l, sizeof l); 1765 break; 1766 1767 case SO_USELOOPBACK: 1768 case SO_DONTROUTE: 1769 case SO_DEBUG: 1770 case SO_KEEPALIVE: 1771 case SO_REUSEADDR: 1772 case SO_REUSEPORT: 1773 case SO_BROADCAST: 1774 case SO_OOBINLINE: 1775 case SO_TIMESTAMP: 1776 optval = so->so_options & sopt->sopt_name; 1777 integer: 1778 error = sooptcopyout(sopt, &optval, sizeof optval); 1779 break; 1780 1781 case SO_TYPE: 1782 optval = so->so_type; 1783 goto integer; 1784 1785 case SO_ERROR: 1786 optval = so->so_error; 1787 so->so_error = 0; 1788 goto integer; 1789 1790 case SO_SNDBUF: 1791 optval = so->so_snd.ssb_hiwat; 1792 goto integer; 1793 1794 case SO_RCVBUF: 1795 optval = so->so_rcv.ssb_hiwat; 1796 goto integer; 1797 1798 case SO_SNDLOWAT: 1799 optval = so->so_snd.ssb_lowat; 1800 goto integer; 1801 1802 case SO_RCVLOWAT: 1803 optval = so->so_rcv.ssb_lowat; 1804 goto integer; 1805 1806 case SO_SNDTIMEO: 1807 case SO_RCVTIMEO: 1808 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1809 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); 1810 1811 tv.tv_sec = optval / hz; 1812 tv.tv_usec = (optval % hz) * ustick; 1813 error = sooptcopyout(sopt, &tv, sizeof tv); 1814 break; 1815 1816 case SO_SNDSPACE: 1817 optval_l = ssb_space(&so->so_snd); 1818 error = sooptcopyout(sopt, &optval_l, sizeof(optval_l)); 1819 break; 1820 1821 default: 1822 error = ENOPROTOOPT; 1823 break; 1824 } 1825 return (error); 1826 } 1827 } 1828 1829 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1830 int 1831 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1832 { 1833 struct mbuf *m, *m_prev; 1834 int sopt_size = sopt->sopt_valsize, msize; 1835 1836 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA, 1837 0, &msize); 1838 if (m == NULL) 1839 return (ENOBUFS); 1840 m->m_len = min(msize, sopt_size); 1841 sopt_size -= m->m_len; 1842 *mp = m; 1843 m_prev = m; 1844 1845 while (sopt_size > 0) { 1846 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, 1847 MT_DATA, 0, &msize); 1848 if (m == NULL) { 1849 m_freem(*mp); 1850 return (ENOBUFS); 1851 } 1852 m->m_len = min(msize, sopt_size); 1853 sopt_size -= m->m_len; 1854 m_prev->m_next = m; 1855 m_prev = m; 1856 } 1857 return (0); 1858 } 1859 1860 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1861 int 1862 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 1863 { 1864 soopt_to_mbuf(sopt, m); 1865 return 0; 1866 } 1867 1868 void 1869 soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) 1870 { 1871 size_t valsize; 1872 void *val; 1873 1874 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1875 KKASSERT(kva_p(m)); 1876 if (sopt->sopt_val == NULL) 1877 return; 1878 val = sopt->sopt_val; 1879 valsize = sopt->sopt_valsize; 1880 while (m != NULL && valsize >= m->m_len) { 1881 bcopy(val, mtod(m, char *), m->m_len); 1882 valsize -= m->m_len; 1883 val = (caddr_t)val + m->m_len; 1884 m = m->m_next; 1885 } 1886 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 1887 panic("ip6_sooptmcopyin"); 1888 } 1889 1890 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 1891 int 1892 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 1893 { 1894 return soopt_from_mbuf(sopt, m); 1895 } 1896 1897 int 1898 soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) 1899 { 1900 struct mbuf *m0 = m; 1901 size_t valsize = 0; 1902 size_t maxsize; 1903 void *val; 1904 1905 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1906 KKASSERT(kva_p(m)); 1907 if (sopt->sopt_val == NULL) 1908 return 0; 1909 val = sopt->sopt_val; 1910 maxsize = sopt->sopt_valsize; 1911 while (m != NULL && maxsize >= m->m_len) { 1912 bcopy(mtod(m, char *), val, m->m_len); 1913 maxsize -= m->m_len; 1914 val = (caddr_t)val + m->m_len; 1915 valsize += m->m_len; 1916 m = m->m_next; 1917 } 1918 if (m != NULL) { 1919 /* enough soopt buffer should be given from user-land */ 1920 m_freem(m0); 1921 return (EINVAL); 1922 } 1923 sopt->sopt_valsize = valsize; 1924 return 0; 1925 } 1926 1927 void 1928 sohasoutofband(struct socket *so) 1929 { 1930 if (so->so_sigio != NULL) 1931 pgsigio(so->so_sigio, SIGURG, 0); 1932 KNOTE(&so->so_rcv.ssb_kq.ki_note, NOTE_OOB); 1933 } 1934 1935 int 1936 sokqfilter(struct file *fp, struct knote *kn) 1937 { 1938 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1939 struct signalsockbuf *ssb; 1940 1941 switch (kn->kn_filter) { 1942 case EVFILT_READ: 1943 if (so->so_options & SO_ACCEPTCONN) 1944 kn->kn_fop = &solisten_filtops; 1945 else 1946 kn->kn_fop = &soread_filtops; 1947 ssb = &so->so_rcv; 1948 break; 1949 case EVFILT_WRITE: 1950 kn->kn_fop = &sowrite_filtops; 1951 ssb = &so->so_snd; 1952 break; 1953 case EVFILT_EXCEPT: 1954 kn->kn_fop = &soexcept_filtops; 1955 ssb = &so->so_rcv; 1956 break; 1957 default: 1958 return (EOPNOTSUPP); 1959 } 1960 1961 knote_insert(&ssb->ssb_kq.ki_note, kn); 1962 atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); 1963 return (0); 1964 } 1965 1966 static void 1967 filt_sordetach(struct knote *kn) 1968 { 1969 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1970 1971 knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); 1972 if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) 1973 atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); 1974 } 1975 1976 /*ARGSUSED*/ 1977 static int 1978 filt_soread(struct knote *kn, long hint) 1979 { 1980 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1981 1982 if (kn->kn_sfflags & NOTE_OOB) { 1983 if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { 1984 kn->kn_fflags |= NOTE_OOB; 1985 return (1); 1986 } 1987 return (0); 1988 } 1989 kn->kn_data = so->so_rcv.ssb_cc; 1990 1991 if (so->so_state & SS_CANTRCVMORE) { 1992 /* 1993 * Only set NODATA if all data has been exhausted. 1994 */ 1995 if (kn->kn_data == 0) 1996 kn->kn_flags |= EV_NODATA; 1997 kn->kn_flags |= EV_EOF; 1998 kn->kn_fflags = so->so_error; 1999 return (1); 2000 } 2001 if (so->so_error) /* temporary udp error */ 2002 return (1); 2003 if (kn->kn_sfflags & NOTE_LOWAT) 2004 return (kn->kn_data >= kn->kn_sdata); 2005 return ((kn->kn_data >= so->so_rcv.ssb_lowat) || 2006 !TAILQ_EMPTY(&so->so_comp)); 2007 } 2008 2009 static void 2010 filt_sowdetach(struct knote *kn) 2011 { 2012 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2013 2014 knote_remove(&so->so_snd.ssb_kq.ki_note, kn); 2015 if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) 2016 atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); 2017 } 2018 2019 /*ARGSUSED*/ 2020 static int 2021 filt_sowrite(struct knote *kn, long hint) 2022 { 2023 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2024 2025 kn->kn_data = ssb_space(&so->so_snd); 2026 if (so->so_state & SS_CANTSENDMORE) { 2027 kn->kn_flags |= (EV_EOF | EV_NODATA); 2028 kn->kn_fflags = so->so_error; 2029 return (1); 2030 } 2031 if (so->so_error) /* temporary udp error */ 2032 return (1); 2033 if (((so->so_state & SS_ISCONNECTED) == 0) && 2034 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2035 return (0); 2036 if (kn->kn_sfflags & NOTE_LOWAT) 2037 return (kn->kn_data >= kn->kn_sdata); 2038 return (kn->kn_data >= so->so_snd.ssb_lowat); 2039 } 2040 2041 /*ARGSUSED*/ 2042 static int 2043 filt_solisten(struct knote *kn, long hint) 2044 { 2045 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2046 2047 kn->kn_data = so->so_qlen; 2048 return (! TAILQ_EMPTY(&so->so_comp)); 2049 } 2050