1 /* $NetBSD: uipc_socket.c,v 1.80 2003/05/03 17:53:17 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1982, 1986, 1988, 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the University of 54 * California, Berkeley and its contributors. 55 * 4. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 72 */ 73 74 #include <sys/cdefs.h> 75 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.80 2003/05/03 17:53:17 yamt Exp $"); 76 77 #include "opt_sock_counters.h" 78 #include "opt_sosend_loan.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/file.h> 84 #include <sys/malloc.h> 85 #include <sys/mbuf.h> 86 #include <sys/domain.h> 87 #include <sys/kernel.h> 88 #include <sys/protosw.h> 89 #include <sys/socket.h> 90 #include <sys/socketvar.h> 91 #include <sys/signalvar.h> 92 #include <sys/resourcevar.h> 93 #include <sys/pool.h> 94 #include <sys/event.h> 95 96 #include <uvm/uvm.h> 97 98 struct pool socket_pool; 99 100 MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); 101 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 102 103 extern int somaxconn; /* patchable (XXX sysctl) */ 104 int somaxconn = SOMAXCONN; 105 106 #ifdef SOSEND_COUNTERS 107 #include <sys/device.h> 108 109 struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 110 NULL, "sosend", "loan big"); 111 struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 112 NULL, "sosend", "copy big"); 113 struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 114 NULL, "sosend", "copy small"); 115 struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 116 NULL, "sosend", "kva limit"); 117 118 #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ 119 120 #else 121 122 #define SOSEND_COUNTER_INCR(ev) /* nothing */ 123 124 #endif /* SOSEND_COUNTERS */ 125 126 void 127 soinit(void) 128 { 129 130 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 131 "sockpl", NULL); 132 133 #ifdef SOSEND_COUNTERS 134 evcnt_attach_static(&sosend_loan_big); 135 evcnt_attach_static(&sosend_copy_big); 136 evcnt_attach_static(&sosend_copy_small); 137 evcnt_attach_static(&sosend_kvalimit); 138 #endif /* SOSEND_COUNTERS */ 139 } 140 141 #ifdef SOSEND_NO_LOAN 142 int use_sosend_loan = 0; 143 #else 144 int use_sosend_loan = 1; 145 #endif 146 147 struct mbuf *so_pendfree; 148 149 int somaxkva = 16 * 1024 * 1024; 150 int socurkva; 151 int sokvawaiters; 152 153 #define SOCK_LOAN_THRESH 4096 154 #define SOCK_LOAN_CHUNK 65536 155 156 static size_t sodopendfree(struct socket *); 157 158 vaddr_t 159 sokvaalloc(vsize_t len, struct socket *so) 160 { 161 vaddr_t lva; 162 int s; 163 164 while (socurkva + len > somaxkva) { 165 if (sodopendfree(so)) 166 continue; 167 SOSEND_COUNTER_INCR(&sosend_kvalimit); 168 s = splvm(); 169 sokvawaiters++; 170 (void) tsleep(&socurkva, PVM, "sokva", 0); 171 sokvawaiters--; 172 splx(s); 173 } 174 175 lva = uvm_km_valloc_wait(kernel_map, len); 176 if (lva == 0) 177 return (0); 178 socurkva += len; 179 180 return lva; 181 } 182 183 void 184 sokvafree(vaddr_t sva, vsize_t len) 185 { 186 187 uvm_km_free(kernel_map, sva, len); 188 socurkva -= len; 189 if (sokvawaiters) 190 wakeup(&socurkva); 191 } 192 193 static void 194 sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) 195 { 196 vaddr_t va, sva, eva; 197 vsize_t len; 198 paddr_t pa; 199 int i, npgs; 200 201 eva = round_page((vaddr_t) buf + size); 202 sva = trunc_page((vaddr_t) buf); 203 len = eva - sva; 204 npgs = len >> PAGE_SHIFT; 205 206 if (__predict_false(pgs == NULL)) { 207 pgs = alloca(npgs * sizeof(*pgs)); 208 209 for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { 210 if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) 211 panic("sodoloanfree: va 0x%lx not mapped", va); 212 pgs[i] = PHYS_TO_VM_PAGE(pa); 213 } 214 } 215 216 pmap_kremove(sva, len); 217 pmap_update(pmap_kernel()); 218 uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); 219 sokvafree(sva, len); 220 } 221 222 static size_t 223 sodopendfree(struct socket *so) 224 { 225 struct mbuf *m; 226 size_t rv = 0; 227 int s; 228 229 s = splvm(); 230 231 for (;;) { 232 m = so_pendfree; 233 if (m == NULL) 234 break; 235 so_pendfree = m->m_next; 236 splx(s); 237 238 rv += m->m_ext.ext_size; 239 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 240 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 241 m->m_ext.ext_size); 242 s = splvm(); 243 pool_cache_put(&mbpool_cache, m); 244 } 245 246 for (;;) { 247 m = so->so_pendfree; 248 if (m == NULL) 249 break; 250 so->so_pendfree = m->m_next; 251 splx(s); 252 253 rv += m->m_ext.ext_size; 254 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 255 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 256 m->m_ext.ext_size); 257 s = splvm(); 258 pool_cache_put(&mbpool_cache, m); 259 } 260 261 splx(s); 262 return (rv); 263 } 264 265 void 266 soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) 267 { 268 struct socket *so = arg; 269 int s; 270 271 if (m == NULL) { 272 sodoloanfree(NULL, buf, size); 273 return; 274 } 275 276 s = splvm(); 277 m->m_next = so->so_pendfree; 278 so->so_pendfree = m; 279 splx(s); 280 if (sokvawaiters) 281 wakeup(&socurkva); 282 } 283 284 static long 285 sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space) 286 { 287 struct iovec *iov = uio->uio_iov; 288 vaddr_t sva, eva; 289 vsize_t len; 290 vaddr_t lva, va; 291 int npgs, i, error; 292 293 if (uio->uio_segflg != UIO_USERSPACE) 294 return (0); 295 296 if (iov->iov_len < (size_t) space) 297 space = iov->iov_len; 298 if (space > SOCK_LOAN_CHUNK) 299 space = SOCK_LOAN_CHUNK; 300 301 eva = round_page((vaddr_t) iov->iov_base + space); 302 sva = trunc_page((vaddr_t) iov->iov_base); 303 len = eva - sva; 304 npgs = len >> PAGE_SHIFT; 305 306 /* XXX KDASSERT */ 307 KASSERT(npgs <= M_EXT_MAXPAGES); 308 309 lva = sokvaalloc(len, so); 310 if (lva == 0) 311 return 0; 312 313 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, 314 m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); 315 if (error) { 316 sokvafree(lva, len); 317 return (0); 318 } 319 320 for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) 321 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), 322 VM_PROT_READ); 323 pmap_update(pmap_kernel()); 324 325 lva += (vaddr_t) iov->iov_base & PAGE_MASK; 326 327 MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); 328 m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; 329 330 uio->uio_resid -= space; 331 /* uio_offset not updated, not set/used for write(2) */ 332 uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; 333 uio->uio_iov->iov_len -= space; 334 if (uio->uio_iov->iov_len == 0) { 335 uio->uio_iov++; 336 uio->uio_iovcnt--; 337 } 338 339 return (space); 340 } 341 342 /* 343 * Socket operation routines. 344 * These routines are called by the routines in 345 * sys_socket.c or from a system process, and 346 * implement the semantics of socket operations by 347 * switching out to the protocol specific routines. 348 */ 349 /*ARGSUSED*/ 350 int 351 socreate(int dom, struct socket **aso, int type, int proto) 352 { 353 struct proc *p; 354 struct protosw *prp; 355 struct socket *so; 356 int error, s; 357 358 p = curproc; /* XXX */ 359 if (proto) 360 prp = pffindproto(dom, proto, type); 361 else 362 prp = pffindtype(dom, type); 363 if (prp == 0 || prp->pr_usrreq == 0) 364 return (EPROTONOSUPPORT); 365 if (prp->pr_type != type) 366 return (EPROTOTYPE); 367 s = splsoftnet(); 368 so = pool_get(&socket_pool, PR_WAITOK); 369 memset((caddr_t)so, 0, sizeof(*so)); 370 TAILQ_INIT(&so->so_q0); 371 TAILQ_INIT(&so->so_q); 372 so->so_type = type; 373 so->so_proto = prp; 374 so->so_send = sosend; 375 so->so_receive = soreceive; 376 #ifdef MBUFTRACE 377 so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; 378 so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; 379 so->so_mowner = &prp->pr_domain->dom_mowner; 380 #endif 381 if (p != 0) 382 so->so_uid = p->p_ucred->cr_uid; 383 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 384 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 385 if (error) { 386 so->so_state |= SS_NOFDREF; 387 sofree(so); 388 splx(s); 389 return (error); 390 } 391 splx(s); 392 *aso = so; 393 return (0); 394 } 395 396 int 397 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 398 { 399 int s, error; 400 401 s = splsoftnet(); 402 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 403 nam, (struct mbuf *)0, p); 404 splx(s); 405 return (error); 406 } 407 408 int 409 solisten(struct socket *so, int backlog) 410 { 411 int s, error; 412 413 s = splsoftnet(); 414 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 415 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 416 if (error) { 417 splx(s); 418 return (error); 419 } 420 if (TAILQ_EMPTY(&so->so_q)) 421 so->so_options |= SO_ACCEPTCONN; 422 if (backlog < 0) 423 backlog = 0; 424 so->so_qlimit = min(backlog, somaxconn); 425 splx(s); 426 return (0); 427 } 428 429 void 430 sofree(struct socket *so) 431 { 432 struct mbuf *m; 433 434 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 435 return; 436 if (so->so_head) { 437 /* 438 * We must not decommission a socket that's on the accept(2) 439 * queue. If we do, then accept(2) may hang after select(2) 440 * indicated that the listening socket was ready. 441 */ 442 if (!soqremque(so, 0)) 443 return; 444 } 445 sbrelease(&so->so_snd); 446 sorflush(so); 447 while ((m = so->so_pendfree) != NULL) { 448 so->so_pendfree = m->m_next; 449 m->m_next = so_pendfree; 450 so_pendfree = m; 451 } 452 pool_put(&socket_pool, so); 453 } 454 455 /* 456 * Close a socket on last file table reference removal. 457 * Initiate disconnect if connected. 458 * Free socket when disconnect complete. 459 */ 460 int 461 soclose(struct socket *so) 462 { 463 struct socket *so2; 464 int s, error; 465 466 error = 0; 467 s = splsoftnet(); /* conservative */ 468 if (so->so_options & SO_ACCEPTCONN) { 469 while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { 470 (void) soqremque(so2, 0); 471 (void) soabort(so2); 472 } 473 while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { 474 (void) soqremque(so2, 1); 475 (void) soabort(so2); 476 } 477 } 478 if (so->so_pcb == 0) 479 goto discard; 480 if (so->so_state & SS_ISCONNECTED) { 481 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 482 error = sodisconnect(so); 483 if (error) 484 goto drop; 485 } 486 if (so->so_options & SO_LINGER) { 487 if ((so->so_state & SS_ISDISCONNECTING) && 488 (so->so_state & SS_NBIO)) 489 goto drop; 490 while (so->so_state & SS_ISCONNECTED) { 491 error = tsleep((caddr_t)&so->so_timeo, 492 PSOCK | PCATCH, netcls, 493 so->so_linger * hz); 494 if (error) 495 break; 496 } 497 } 498 } 499 drop: 500 if (so->so_pcb) { 501 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 502 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 503 (struct proc *)0); 504 if (error == 0) 505 error = error2; 506 } 507 discard: 508 if (so->so_state & SS_NOFDREF) 509 panic("soclose: NOFDREF"); 510 so->so_state |= SS_NOFDREF; 511 sofree(so); 512 splx(s); 513 return (error); 514 } 515 516 /* 517 * Must be called at splsoftnet... 518 */ 519 int 520 soabort(struct socket *so) 521 { 522 523 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 524 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 525 } 526 527 int 528 soaccept(struct socket *so, struct mbuf *nam) 529 { 530 int s, error; 531 532 error = 0; 533 s = splsoftnet(); 534 if ((so->so_state & SS_NOFDREF) == 0) 535 panic("soaccept: !NOFDREF"); 536 so->so_state &= ~SS_NOFDREF; 537 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 538 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 539 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 540 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 541 else 542 error = ECONNABORTED; 543 544 splx(s); 545 return (error); 546 } 547 548 int 549 soconnect(struct socket *so, struct mbuf *nam) 550 { 551 struct proc *p; 552 int s, error; 553 554 p = curproc; /* XXX */ 555 if (so->so_options & SO_ACCEPTCONN) 556 return (EOPNOTSUPP); 557 s = splsoftnet(); 558 /* 559 * If protocol is connection-based, can only connect once. 560 * Otherwise, if connected, try to disconnect first. 561 * This allows user to disconnect by connecting to, e.g., 562 * a null address. 563 */ 564 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 565 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 566 (error = sodisconnect(so)))) 567 error = EISCONN; 568 else 569 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 570 (struct mbuf *)0, nam, (struct mbuf *)0, p); 571 splx(s); 572 return (error); 573 } 574 575 int 576 soconnect2(struct socket *so1, struct socket *so2) 577 { 578 int s, error; 579 580 s = splsoftnet(); 581 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 582 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 583 (struct proc *)0); 584 splx(s); 585 return (error); 586 } 587 588 int 589 sodisconnect(struct socket *so) 590 { 591 int s, error; 592 593 s = splsoftnet(); 594 if ((so->so_state & SS_ISCONNECTED) == 0) { 595 error = ENOTCONN; 596 goto bad; 597 } 598 if (so->so_state & SS_ISDISCONNECTING) { 599 error = EALREADY; 600 goto bad; 601 } 602 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 603 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 604 (struct proc *)0); 605 bad: 606 splx(s); 607 sodopendfree(so); 608 return (error); 609 } 610 611 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 612 /* 613 * Send on a socket. 614 * If send must go all at once and message is larger than 615 * send buffering, then hard error. 616 * Lock against other senders. 617 * If must go all at once and not enough room now, then 618 * inform user that this would block and do nothing. 619 * Otherwise, if nonblocking, send as much as possible. 620 * The data to be sent is described by "uio" if nonzero, 621 * otherwise by the mbuf chain "top" (which must be null 622 * if uio is not). Data provided in mbuf chain must be small 623 * enough to send all at once. 624 * 625 * Returns nonzero on error, timeout or signal; callers 626 * must check for short counts if EINTR/ERESTART are returned. 627 * Data and control buffers are freed on return. 628 */ 629 int 630 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 631 struct mbuf *control, int flags) 632 { 633 struct proc *p; 634 struct mbuf **mp, *m; 635 long space, len, resid, clen, mlen; 636 int error, s, dontroute, atomic; 637 638 sodopendfree(so); 639 640 p = curproc; /* XXX */ 641 clen = 0; 642 atomic = sosendallatonce(so) || top; 643 if (uio) 644 resid = uio->uio_resid; 645 else 646 resid = top->m_pkthdr.len; 647 /* 648 * In theory resid should be unsigned. 649 * However, space must be signed, as it might be less than 0 650 * if we over-committed, and we must use a signed comparison 651 * of space and resid. On the other hand, a negative resid 652 * causes us to loop sending 0-length segments to the protocol. 653 */ 654 if (resid < 0) { 655 error = EINVAL; 656 goto out; 657 } 658 dontroute = 659 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 660 (so->so_proto->pr_flags & PR_ATOMIC); 661 p->p_stats->p_ru.ru_msgsnd++; 662 if (control) 663 clen = control->m_len; 664 #define snderr(errno) { error = errno; splx(s); goto release; } 665 666 restart: 667 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 668 goto out; 669 do { 670 s = splsoftnet(); 671 if (so->so_state & SS_CANTSENDMORE) 672 snderr(EPIPE); 673 if (so->so_error) { 674 error = so->so_error; 675 so->so_error = 0; 676 splx(s); 677 goto release; 678 } 679 if ((so->so_state & SS_ISCONNECTED) == 0) { 680 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 681 if ((so->so_state & SS_ISCONFIRMING) == 0 && 682 !(resid == 0 && clen != 0)) 683 snderr(ENOTCONN); 684 } else if (addr == 0) 685 snderr(EDESTADDRREQ); 686 } 687 space = sbspace(&so->so_snd); 688 if (flags & MSG_OOB) 689 space += 1024; 690 if ((atomic && resid > so->so_snd.sb_hiwat) || 691 clen > so->so_snd.sb_hiwat) 692 snderr(EMSGSIZE); 693 if (space < resid + clen && uio && 694 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 695 if (so->so_state & SS_NBIO) 696 snderr(EWOULDBLOCK); 697 sbunlock(&so->so_snd); 698 error = sbwait(&so->so_snd); 699 splx(s); 700 if (error) 701 goto out; 702 goto restart; 703 } 704 splx(s); 705 mp = ⊤ 706 space -= clen; 707 do { 708 if (uio == NULL) { 709 /* 710 * Data is prepackaged in "top". 711 */ 712 resid = 0; 713 if (flags & MSG_EOR) 714 top->m_flags |= M_EOR; 715 } else do { 716 if (top == 0) { 717 m = m_gethdr(M_WAIT, MT_DATA); 718 mlen = MHLEN; 719 m->m_pkthdr.len = 0; 720 m->m_pkthdr.rcvif = (struct ifnet *)0; 721 } else { 722 m = m_get(M_WAIT, MT_DATA); 723 mlen = MLEN; 724 } 725 MCLAIM(m, so->so_snd.sb_mowner); 726 if (use_sosend_loan && 727 uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && 728 space >= SOCK_LOAN_THRESH && 729 (len = sosend_loan(so, uio, m, 730 space)) != 0) { 731 SOSEND_COUNTER_INCR(&sosend_loan_big); 732 space -= len; 733 goto have_data; 734 } 735 if (resid >= MINCLSIZE && space >= MCLBYTES) { 736 SOSEND_COUNTER_INCR(&sosend_copy_big); 737 m_clget(m, M_WAIT); 738 if ((m->m_flags & M_EXT) == 0) 739 goto nopages; 740 mlen = MCLBYTES; 741 if (atomic && top == 0) { 742 len = lmin(MCLBYTES - max_hdr, 743 resid); 744 m->m_data += max_hdr; 745 } else 746 len = lmin(MCLBYTES, resid); 747 space -= len; 748 } else { 749 nopages: 750 SOSEND_COUNTER_INCR(&sosend_copy_small); 751 len = lmin(lmin(mlen, resid), space); 752 space -= len; 753 /* 754 * For datagram protocols, leave room 755 * for protocol headers in first mbuf. 756 */ 757 if (atomic && top == 0 && len < mlen) 758 MH_ALIGN(m, len); 759 } 760 error = uiomove(mtod(m, caddr_t), (int)len, 761 uio); 762 have_data: 763 resid = uio->uio_resid; 764 m->m_len = len; 765 *mp = m; 766 top->m_pkthdr.len += len; 767 if (error) 768 goto release; 769 mp = &m->m_next; 770 if (resid <= 0) { 771 if (flags & MSG_EOR) 772 top->m_flags |= M_EOR; 773 break; 774 } 775 } while (space > 0 && atomic); 776 777 s = splsoftnet(); 778 779 if (so->so_state & SS_CANTSENDMORE) 780 snderr(EPIPE); 781 782 if (dontroute) 783 so->so_options |= SO_DONTROUTE; 784 if (resid > 0) 785 so->so_state |= SS_MORETOCOME; 786 error = (*so->so_proto->pr_usrreq)(so, 787 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 788 top, addr, control, p); 789 if (dontroute) 790 so->so_options &= ~SO_DONTROUTE; 791 if (resid > 0) 792 so->so_state &= ~SS_MORETOCOME; 793 splx(s); 794 795 clen = 0; 796 control = 0; 797 top = 0; 798 mp = ⊤ 799 if (error) 800 goto release; 801 } while (resid && space > 0); 802 } while (resid); 803 804 release: 805 sbunlock(&so->so_snd); 806 out: 807 if (top) 808 m_freem(top); 809 if (control) 810 m_freem(control); 811 return (error); 812 } 813 814 /* 815 * Implement receive operations on a socket. 816 * We depend on the way that records are added to the sockbuf 817 * by sbappend*. In particular, each record (mbufs linked through m_next) 818 * must begin with an address if the protocol so specifies, 819 * followed by an optional mbuf or mbufs containing ancillary data, 820 * and then zero or more mbufs of data. 821 * In order to avoid blocking network interrupts for the entire time here, 822 * we splx() while doing the actual copy to user space. 823 * Although the sockbuf is locked, new data may still be appended, 824 * and thus we must maintain consistency of the sockbuf during that time. 825 * 826 * The caller may receive the data as a single mbuf chain by supplying 827 * an mbuf **mp0 for use in returning the chain. The uio is then used 828 * only for the count in uio_resid. 829 */ 830 int 831 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 832 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 833 { 834 struct mbuf *m, **mp; 835 int flags, len, error, s, offset, moff, type, orig_resid; 836 struct protosw *pr; 837 struct mbuf *nextrecord; 838 int mbuf_removed = 0; 839 840 pr = so->so_proto; 841 mp = mp0; 842 type = 0; 843 orig_resid = uio->uio_resid; 844 if (paddr) 845 *paddr = 0; 846 if (controlp) 847 *controlp = 0; 848 if (flagsp) 849 flags = *flagsp &~ MSG_EOR; 850 else 851 flags = 0; 852 853 if ((flags & MSG_DONTWAIT) == 0) 854 sodopendfree(so); 855 856 if (flags & MSG_OOB) { 857 m = m_get(M_WAIT, MT_DATA); 858 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 859 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 860 (struct proc *)0); 861 if (error) 862 goto bad; 863 do { 864 error = uiomove(mtod(m, caddr_t), 865 (int) min(uio->uio_resid, m->m_len), uio); 866 m = m_free(m); 867 } while (uio->uio_resid && error == 0 && m); 868 bad: 869 if (m) 870 m_freem(m); 871 return (error); 872 } 873 if (mp) 874 *mp = (struct mbuf *)0; 875 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 876 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 877 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 878 879 restart: 880 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 881 return (error); 882 s = splsoftnet(); 883 884 m = so->so_rcv.sb_mb; 885 /* 886 * If we have less data than requested, block awaiting more 887 * (subject to any timeout) if: 888 * 1. the current count is less than the low water mark, 889 * 2. MSG_WAITALL is set, and it is possible to do the entire 890 * receive operation at once if we block (resid <= hiwat), or 891 * 3. MSG_DONTWAIT is not set. 892 * If MSG_WAITALL is set but resid is larger than the receive buffer, 893 * we have to do the receive in sections, and thus risk returning 894 * a short count if a timeout or signal occurs after we start. 895 */ 896 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 897 so->so_rcv.sb_cc < uio->uio_resid) && 898 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 899 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 900 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 901 #ifdef DIAGNOSTIC 902 if (m == 0 && so->so_rcv.sb_cc) 903 panic("receive 1"); 904 #endif 905 if (so->so_error) { 906 if (m) 907 goto dontblock; 908 error = so->so_error; 909 if ((flags & MSG_PEEK) == 0) 910 so->so_error = 0; 911 goto release; 912 } 913 if (so->so_state & SS_CANTRCVMORE) { 914 if (m) 915 goto dontblock; 916 else 917 goto release; 918 } 919 for (; m; m = m->m_next) 920 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 921 m = so->so_rcv.sb_mb; 922 goto dontblock; 923 } 924 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 925 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 926 error = ENOTCONN; 927 goto release; 928 } 929 if (uio->uio_resid == 0) 930 goto release; 931 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 932 error = EWOULDBLOCK; 933 goto release; 934 } 935 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 936 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 937 sbunlock(&so->so_rcv); 938 error = sbwait(&so->so_rcv); 939 splx(s); 940 if (error) 941 return (error); 942 goto restart; 943 } 944 dontblock: 945 /* 946 * On entry here, m points to the first record of the socket buffer. 947 * While we process the initial mbufs containing address and control 948 * info, we save a copy of m->m_nextpkt into nextrecord. 949 */ 950 #ifdef notyet /* XXXX */ 951 if (uio->uio_procp) 952 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 953 #endif 954 KASSERT(m == so->so_rcv.sb_mb); 955 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 956 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 957 nextrecord = m->m_nextpkt; 958 if (pr->pr_flags & PR_ADDR) { 959 #ifdef DIAGNOSTIC 960 if (m->m_type != MT_SONAME) 961 panic("receive 1a"); 962 #endif 963 orig_resid = 0; 964 if (flags & MSG_PEEK) { 965 if (paddr) 966 *paddr = m_copy(m, 0, m->m_len); 967 m = m->m_next; 968 } else { 969 sbfree(&so->so_rcv, m); 970 mbuf_removed = 1; 971 if (paddr) { 972 *paddr = m; 973 so->so_rcv.sb_mb = m->m_next; 974 m->m_next = 0; 975 m = so->so_rcv.sb_mb; 976 } else { 977 MFREE(m, so->so_rcv.sb_mb); 978 m = so->so_rcv.sb_mb; 979 } 980 } 981 } 982 while (m && m->m_type == MT_CONTROL && error == 0) { 983 if (flags & MSG_PEEK) { 984 if (controlp) 985 *controlp = m_copy(m, 0, m->m_len); 986 m = m->m_next; 987 } else { 988 sbfree(&so->so_rcv, m); 989 mbuf_removed = 1; 990 if (controlp) { 991 if (pr->pr_domain->dom_externalize && 992 mtod(m, struct cmsghdr *)->cmsg_type == 993 SCM_RIGHTS) 994 error = (*pr->pr_domain->dom_externalize)(m); 995 *controlp = m; 996 so->so_rcv.sb_mb = m->m_next; 997 m->m_next = 0; 998 m = so->so_rcv.sb_mb; 999 } else { 1000 MFREE(m, so->so_rcv.sb_mb); 1001 m = so->so_rcv.sb_mb; 1002 } 1003 } 1004 if (controlp) { 1005 orig_resid = 0; 1006 controlp = &(*controlp)->m_next; 1007 } 1008 } 1009 1010 /* 1011 * If m is non-NULL, we have some data to read. From now on, 1012 * make sure to keep sb_lastrecord consistent when working on 1013 * the last packet on the chain (nextrecord == NULL) and we 1014 * change m->m_nextpkt. 1015 */ 1016 if (m) { 1017 if ((flags & MSG_PEEK) == 0) { 1018 m->m_nextpkt = nextrecord; 1019 /* 1020 * If nextrecord == NULL (this is a single chain), 1021 * then sb_lastrecord may not be valid here if m 1022 * was changed earlier. 1023 */ 1024 if (nextrecord == NULL) { 1025 KASSERT(so->so_rcv.sb_mb == m); 1026 so->so_rcv.sb_lastrecord = m; 1027 } 1028 } 1029 type = m->m_type; 1030 if (type == MT_OOBDATA) 1031 flags |= MSG_OOB; 1032 } else { 1033 if ((flags & MSG_PEEK) == 0) { 1034 KASSERT(so->so_rcv.sb_mb == m); 1035 so->so_rcv.sb_mb = nextrecord; 1036 SB_EMPTY_FIXUP(&so->so_rcv); 1037 } 1038 } 1039 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1040 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1041 1042 moff = 0; 1043 offset = 0; 1044 while (m && uio->uio_resid > 0 && error == 0) { 1045 if (m->m_type == MT_OOBDATA) { 1046 if (type != MT_OOBDATA) 1047 break; 1048 } else if (type == MT_OOBDATA) 1049 break; 1050 #ifdef DIAGNOSTIC 1051 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 1052 panic("receive 3"); 1053 #endif 1054 so->so_state &= ~SS_RCVATMARK; 1055 len = uio->uio_resid; 1056 if (so->so_oobmark && len > so->so_oobmark - offset) 1057 len = so->so_oobmark - offset; 1058 if (len > m->m_len - moff) 1059 len = m->m_len - moff; 1060 /* 1061 * If mp is set, just pass back the mbufs. 1062 * Otherwise copy them out via the uio, then free. 1063 * Sockbuf must be consistent here (points to current mbuf, 1064 * it points to next record) when we drop priority; 1065 * we must note any additions to the sockbuf when we 1066 * block interrupts again. 1067 */ 1068 if (mp == 0) { 1069 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1070 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1071 splx(s); 1072 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 1073 s = splsoftnet(); 1074 if (error) { 1075 /* 1076 * If any part of the record has been removed 1077 * (such as the MT_SONAME mbuf, which will 1078 * happen when PR_ADDR, and thus also 1079 * PR_ATOMIC, is set), then drop the entire 1080 * record to maintain the atomicity of the 1081 * receive operation. 1082 * 1083 * This avoids a later panic("receive 1a") 1084 * when compiled with DIAGNOSTIC. 1085 */ 1086 if (m && mbuf_removed 1087 && (pr->pr_flags & PR_ATOMIC)) 1088 (void) sbdroprecord(&so->so_rcv); 1089 1090 goto release; 1091 } 1092 } else 1093 uio->uio_resid -= len; 1094 if (len == m->m_len - moff) { 1095 if (m->m_flags & M_EOR) 1096 flags |= MSG_EOR; 1097 if (flags & MSG_PEEK) { 1098 m = m->m_next; 1099 moff = 0; 1100 } else { 1101 nextrecord = m->m_nextpkt; 1102 sbfree(&so->so_rcv, m); 1103 if (mp) { 1104 *mp = m; 1105 mp = &m->m_next; 1106 so->so_rcv.sb_mb = m = m->m_next; 1107 *mp = (struct mbuf *)0; 1108 } else { 1109 MFREE(m, so->so_rcv.sb_mb); 1110 m = so->so_rcv.sb_mb; 1111 } 1112 /* 1113 * If m != NULL, we also know that 1114 * so->so_rcv.sb_mb != NULL. 1115 */ 1116 KASSERT(so->so_rcv.sb_mb == m); 1117 if (m) { 1118 m->m_nextpkt = nextrecord; 1119 if (nextrecord == NULL) 1120 so->so_rcv.sb_lastrecord = m; 1121 } else { 1122 so->so_rcv.sb_mb = nextrecord; 1123 SB_EMPTY_FIXUP(&so->so_rcv); 1124 } 1125 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1126 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1127 } 1128 } else { 1129 if (flags & MSG_PEEK) 1130 moff += len; 1131 else { 1132 if (mp) 1133 *mp = m_copym(m, 0, len, M_WAIT); 1134 m->m_data += len; 1135 m->m_len -= len; 1136 so->so_rcv.sb_cc -= len; 1137 } 1138 } 1139 if (so->so_oobmark) { 1140 if ((flags & MSG_PEEK) == 0) { 1141 so->so_oobmark -= len; 1142 if (so->so_oobmark == 0) { 1143 so->so_state |= SS_RCVATMARK; 1144 break; 1145 } 1146 } else { 1147 offset += len; 1148 if (offset == so->so_oobmark) 1149 break; 1150 } 1151 } 1152 if (flags & MSG_EOR) 1153 break; 1154 /* 1155 * If the MSG_WAITALL flag is set (for non-atomic socket), 1156 * we must not quit until "uio->uio_resid == 0" or an error 1157 * termination. If a signal/timeout occurs, return 1158 * with a short count but without error. 1159 * Keep sockbuf locked against other readers. 1160 */ 1161 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 1162 !sosendallatonce(so) && !nextrecord) { 1163 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1164 break; 1165 /* 1166 * If we are peeking and the socket receive buffer is 1167 * full, stop since we can't get more data to peek at. 1168 */ 1169 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 1170 break; 1171 /* 1172 * If we've drained the socket buffer, tell the 1173 * protocol in case it needs to do something to 1174 * get it filled again. 1175 */ 1176 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1177 (*pr->pr_usrreq)(so, PRU_RCVD, 1178 (struct mbuf *)0, 1179 (struct mbuf *)(long)flags, 1180 (struct mbuf *)0, 1181 (struct proc *)0); 1182 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1183 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1184 error = sbwait(&so->so_rcv); 1185 if (error) { 1186 sbunlock(&so->so_rcv); 1187 splx(s); 1188 return (0); 1189 } 1190 if ((m = so->so_rcv.sb_mb) != NULL) 1191 nextrecord = m->m_nextpkt; 1192 } 1193 } 1194 1195 if (m && pr->pr_flags & PR_ATOMIC) { 1196 flags |= MSG_TRUNC; 1197 if ((flags & MSG_PEEK) == 0) 1198 (void) sbdroprecord(&so->so_rcv); 1199 } 1200 if ((flags & MSG_PEEK) == 0) { 1201 if (m == 0) { 1202 /* 1203 * First part is an inline SB_EMPTY_FIXUP(). Second 1204 * part makes sure sb_lastrecord is up-to-date if 1205 * there is still data in the socket buffer. 1206 */ 1207 so->so_rcv.sb_mb = nextrecord; 1208 if (so->so_rcv.sb_mb == NULL) { 1209 so->so_rcv.sb_mbtail = NULL; 1210 so->so_rcv.sb_lastrecord = NULL; 1211 } else if (nextrecord->m_nextpkt == NULL) 1212 so->so_rcv.sb_lastrecord = nextrecord; 1213 } 1214 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1215 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1216 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1217 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 1218 (struct mbuf *)(long)flags, (struct mbuf *)0, 1219 (struct proc *)0); 1220 } 1221 if (orig_resid == uio->uio_resid && orig_resid && 1222 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1223 sbunlock(&so->so_rcv); 1224 splx(s); 1225 goto restart; 1226 } 1227 1228 if (flagsp) 1229 *flagsp |= flags; 1230 release: 1231 sbunlock(&so->so_rcv); 1232 splx(s); 1233 return (error); 1234 } 1235 1236 int 1237 soshutdown(struct socket *so, int how) 1238 { 1239 struct protosw *pr; 1240 1241 pr = so->so_proto; 1242 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1243 return (EINVAL); 1244 1245 if (how == SHUT_RD || how == SHUT_RDWR) 1246 sorflush(so); 1247 if (how == SHUT_WR || how == SHUT_RDWR) 1248 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 1249 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 1250 return (0); 1251 } 1252 1253 void 1254 sorflush(struct socket *so) 1255 { 1256 struct sockbuf *sb, asb; 1257 struct protosw *pr; 1258 int s; 1259 1260 sb = &so->so_rcv; 1261 pr = so->so_proto; 1262 sb->sb_flags |= SB_NOINTR; 1263 (void) sblock(sb, M_WAITOK); 1264 s = splnet(); 1265 socantrcvmore(so); 1266 sbunlock(sb); 1267 asb = *sb; 1268 memset((caddr_t)sb, 0, sizeof(*sb)); 1269 splx(s); 1270 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1271 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1272 sbrelease(&asb); 1273 } 1274 1275 int 1276 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1277 { 1278 int error; 1279 struct mbuf *m; 1280 1281 error = 0; 1282 m = m0; 1283 if (level != SOL_SOCKET) { 1284 if (so->so_proto && so->so_proto->pr_ctloutput) 1285 return ((*so->so_proto->pr_ctloutput) 1286 (PRCO_SETOPT, so, level, optname, &m0)); 1287 error = ENOPROTOOPT; 1288 } else { 1289 switch (optname) { 1290 1291 case SO_LINGER: 1292 if (m == NULL || m->m_len != sizeof(struct linger)) { 1293 error = EINVAL; 1294 goto bad; 1295 } 1296 so->so_linger = mtod(m, struct linger *)->l_linger; 1297 /* fall thru... */ 1298 1299 case SO_DEBUG: 1300 case SO_KEEPALIVE: 1301 case SO_DONTROUTE: 1302 case SO_USELOOPBACK: 1303 case SO_BROADCAST: 1304 case SO_REUSEADDR: 1305 case SO_REUSEPORT: 1306 case SO_OOBINLINE: 1307 case SO_TIMESTAMP: 1308 if (m == NULL || m->m_len < sizeof(int)) { 1309 error = EINVAL; 1310 goto bad; 1311 } 1312 if (*mtod(m, int *)) 1313 so->so_options |= optname; 1314 else 1315 so->so_options &= ~optname; 1316 break; 1317 1318 case SO_SNDBUF: 1319 case SO_RCVBUF: 1320 case SO_SNDLOWAT: 1321 case SO_RCVLOWAT: 1322 { 1323 int optval; 1324 1325 if (m == NULL || m->m_len < sizeof(int)) { 1326 error = EINVAL; 1327 goto bad; 1328 } 1329 1330 /* 1331 * Values < 1 make no sense for any of these 1332 * options, so disallow them. 1333 */ 1334 optval = *mtod(m, int *); 1335 if (optval < 1) { 1336 error = EINVAL; 1337 goto bad; 1338 } 1339 1340 switch (optname) { 1341 1342 case SO_SNDBUF: 1343 case SO_RCVBUF: 1344 if (sbreserve(optname == SO_SNDBUF ? 1345 &so->so_snd : &so->so_rcv, 1346 (u_long) optval) == 0) { 1347 error = ENOBUFS; 1348 goto bad; 1349 } 1350 break; 1351 1352 /* 1353 * Make sure the low-water is never greater than 1354 * the high-water. 1355 */ 1356 case SO_SNDLOWAT: 1357 so->so_snd.sb_lowat = 1358 (optval > so->so_snd.sb_hiwat) ? 1359 so->so_snd.sb_hiwat : optval; 1360 break; 1361 case SO_RCVLOWAT: 1362 so->so_rcv.sb_lowat = 1363 (optval > so->so_rcv.sb_hiwat) ? 1364 so->so_rcv.sb_hiwat : optval; 1365 break; 1366 } 1367 break; 1368 } 1369 1370 case SO_SNDTIMEO: 1371 case SO_RCVTIMEO: 1372 { 1373 struct timeval *tv; 1374 short val; 1375 1376 if (m == NULL || m->m_len < sizeof(*tv)) { 1377 error = EINVAL; 1378 goto bad; 1379 } 1380 tv = mtod(m, struct timeval *); 1381 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { 1382 error = EDOM; 1383 goto bad; 1384 } 1385 val = tv->tv_sec * hz + tv->tv_usec / tick; 1386 if (val == 0 && tv->tv_usec != 0) 1387 val = 1; 1388 1389 switch (optname) { 1390 1391 case SO_SNDTIMEO: 1392 so->so_snd.sb_timeo = val; 1393 break; 1394 case SO_RCVTIMEO: 1395 so->so_rcv.sb_timeo = val; 1396 break; 1397 } 1398 break; 1399 } 1400 1401 default: 1402 error = ENOPROTOOPT; 1403 break; 1404 } 1405 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1406 (void) ((*so->so_proto->pr_ctloutput) 1407 (PRCO_SETOPT, so, level, optname, &m0)); 1408 m = NULL; /* freed by protocol */ 1409 } 1410 } 1411 bad: 1412 if (m) 1413 (void) m_free(m); 1414 return (error); 1415 } 1416 1417 int 1418 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1419 { 1420 struct mbuf *m; 1421 1422 if (level != SOL_SOCKET) { 1423 if (so->so_proto && so->so_proto->pr_ctloutput) { 1424 return ((*so->so_proto->pr_ctloutput) 1425 (PRCO_GETOPT, so, level, optname, mp)); 1426 } else 1427 return (ENOPROTOOPT); 1428 } else { 1429 m = m_get(M_WAIT, MT_SOOPTS); 1430 m->m_len = sizeof(int); 1431 1432 switch (optname) { 1433 1434 case SO_LINGER: 1435 m->m_len = sizeof(struct linger); 1436 mtod(m, struct linger *)->l_onoff = 1437 so->so_options & SO_LINGER; 1438 mtod(m, struct linger *)->l_linger = so->so_linger; 1439 break; 1440 1441 case SO_USELOOPBACK: 1442 case SO_DONTROUTE: 1443 case SO_DEBUG: 1444 case SO_KEEPALIVE: 1445 case SO_REUSEADDR: 1446 case SO_REUSEPORT: 1447 case SO_BROADCAST: 1448 case SO_OOBINLINE: 1449 case SO_TIMESTAMP: 1450 *mtod(m, int *) = so->so_options & optname; 1451 break; 1452 1453 case SO_TYPE: 1454 *mtod(m, int *) = so->so_type; 1455 break; 1456 1457 case SO_ERROR: 1458 *mtod(m, int *) = so->so_error; 1459 so->so_error = 0; 1460 break; 1461 1462 case SO_SNDBUF: 1463 *mtod(m, int *) = so->so_snd.sb_hiwat; 1464 break; 1465 1466 case SO_RCVBUF: 1467 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1468 break; 1469 1470 case SO_SNDLOWAT: 1471 *mtod(m, int *) = so->so_snd.sb_lowat; 1472 break; 1473 1474 case SO_RCVLOWAT: 1475 *mtod(m, int *) = so->so_rcv.sb_lowat; 1476 break; 1477 1478 case SO_SNDTIMEO: 1479 case SO_RCVTIMEO: 1480 { 1481 int val = (optname == SO_SNDTIMEO ? 1482 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1483 1484 m->m_len = sizeof(struct timeval); 1485 mtod(m, struct timeval *)->tv_sec = val / hz; 1486 mtod(m, struct timeval *)->tv_usec = 1487 (val % hz) * tick; 1488 break; 1489 } 1490 1491 default: 1492 (void)m_free(m); 1493 return (ENOPROTOOPT); 1494 } 1495 *mp = m; 1496 return (0); 1497 } 1498 } 1499 1500 void 1501 sohasoutofband(struct socket *so) 1502 { 1503 struct proc *p; 1504 1505 if (so->so_pgid < 0) 1506 gsignal(-so->so_pgid, SIGURG); 1507 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1508 psignal(p, SIGURG); 1509 selwakeup(&so->so_rcv.sb_sel); 1510 } 1511 1512 static void 1513 filt_sordetach(struct knote *kn) 1514 { 1515 struct socket *so; 1516 1517 so = (struct socket *)kn->kn_fp->f_data; 1518 SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); 1519 if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) 1520 so->so_rcv.sb_flags &= ~SB_KNOTE; 1521 } 1522 1523 /*ARGSUSED*/ 1524 static int 1525 filt_soread(struct knote *kn, long hint) 1526 { 1527 struct socket *so; 1528 1529 so = (struct socket *)kn->kn_fp->f_data; 1530 kn->kn_data = so->so_rcv.sb_cc; 1531 if (so->so_state & SS_CANTRCVMORE) { 1532 kn->kn_flags |= EV_EOF; 1533 kn->kn_fflags = so->so_error; 1534 return (1); 1535 } 1536 if (so->so_error) /* temporary udp error */ 1537 return (1); 1538 if (kn->kn_sfflags & NOTE_LOWAT) 1539 return (kn->kn_data >= kn->kn_sdata); 1540 return (kn->kn_data >= so->so_rcv.sb_lowat); 1541 } 1542 1543 static void 1544 filt_sowdetach(struct knote *kn) 1545 { 1546 struct socket *so; 1547 1548 so = (struct socket *)kn->kn_fp->f_data; 1549 SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); 1550 if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) 1551 so->so_snd.sb_flags &= ~SB_KNOTE; 1552 } 1553 1554 /*ARGSUSED*/ 1555 static int 1556 filt_sowrite(struct knote *kn, long hint) 1557 { 1558 struct socket *so; 1559 1560 so = (struct socket *)kn->kn_fp->f_data; 1561 kn->kn_data = sbspace(&so->so_snd); 1562 if (so->so_state & SS_CANTSENDMORE) { 1563 kn->kn_flags |= EV_EOF; 1564 kn->kn_fflags = so->so_error; 1565 return (1); 1566 } 1567 if (so->so_error) /* temporary udp error */ 1568 return (1); 1569 if (((so->so_state & SS_ISCONNECTED) == 0) && 1570 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1571 return (0); 1572 if (kn->kn_sfflags & NOTE_LOWAT) 1573 return (kn->kn_data >= kn->kn_sdata); 1574 return (kn->kn_data >= so->so_snd.sb_lowat); 1575 } 1576 1577 /*ARGSUSED*/ 1578 static int 1579 filt_solisten(struct knote *kn, long hint) 1580 { 1581 struct socket *so; 1582 1583 so = (struct socket *)kn->kn_fp->f_data; 1584 1585 /* 1586 * Set kn_data to number of incoming connections, not 1587 * counting partial (incomplete) connections. 1588 */ 1589 kn->kn_data = so->so_qlen; 1590 return (kn->kn_data > 0); 1591 } 1592 1593 static const struct filterops solisten_filtops = 1594 { 1, NULL, filt_sordetach, filt_solisten }; 1595 static const struct filterops soread_filtops = 1596 { 1, NULL, filt_sordetach, filt_soread }; 1597 static const struct filterops sowrite_filtops = 1598 { 1, NULL, filt_sowdetach, filt_sowrite }; 1599 1600 int 1601 soo_kqfilter(struct file *fp, struct knote *kn) 1602 { 1603 struct socket *so; 1604 struct sockbuf *sb; 1605 1606 so = (struct socket *)kn->kn_fp->f_data; 1607 switch (kn->kn_filter) { 1608 case EVFILT_READ: 1609 if (so->so_options & SO_ACCEPTCONN) 1610 kn->kn_fop = &solisten_filtops; 1611 else 1612 kn->kn_fop = &soread_filtops; 1613 sb = &so->so_rcv; 1614 break; 1615 case EVFILT_WRITE: 1616 kn->kn_fop = &sowrite_filtops; 1617 sb = &so->so_snd; 1618 break; 1619 default: 1620 return (1); 1621 } 1622 SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); 1623 sb->sb_flags |= SB_KNOTE; 1624 return (0); 1625 } 1626 1627