1 /* $NetBSD: uipc_socket.c,v 1.91 2003/10/21 22:55:47 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1982, 1986, 1988, 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 68 */ 69 70 #include <sys/cdefs.h> 71 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.91 2003/10/21 22:55:47 thorpej Exp $"); 72 73 #include "opt_sock_counters.h" 74 #include "opt_sosend_loan.h" 75 #include "opt_mbuftrace.h" 76 #include "opt_somaxkva.h" 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/proc.h> 81 #include <sys/file.h> 82 #include <sys/malloc.h> 83 #include <sys/mbuf.h> 84 #include <sys/domain.h> 85 #include <sys/kernel.h> 86 #include <sys/protosw.h> 87 #include <sys/socket.h> 88 #include <sys/socketvar.h> 89 #include <sys/signalvar.h> 90 #include <sys/resourcevar.h> 91 #include <sys/pool.h> 92 #include <sys/event.h> 93 #include <sys/poll.h> 94 95 #include <uvm/uvm.h> 96 97 struct pool socket_pool; 98 99 MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); 100 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 101 102 extern int somaxconn; /* patchable (XXX sysctl) */ 103 int somaxconn = SOMAXCONN; 104 105 #ifdef SOSEND_COUNTERS 106 #include <sys/device.h> 107 108 struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 109 NULL, "sosend", "loan big"); 110 struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 111 NULL, "sosend", "copy big"); 112 struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 113 NULL, "sosend", "copy small"); 114 struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 115 NULL, "sosend", "kva limit"); 116 117 #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ 118 119 #else 120 121 #define SOSEND_COUNTER_INCR(ev) /* nothing */ 122 123 #endif /* SOSEND_COUNTERS */ 124 125 void 126 soinit(void) 127 { 128 129 /* Set the initial adjusted socket buffer size. */ 130 if (sb_max_set(sb_max)) 131 panic("bad initial sb_max value: %lu\n", sb_max); 132 133 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 134 "sockpl", NULL); 135 136 #ifdef SOSEND_COUNTERS 137 evcnt_attach_static(&sosend_loan_big); 138 evcnt_attach_static(&sosend_copy_big); 139 evcnt_attach_static(&sosend_copy_small); 140 evcnt_attach_static(&sosend_kvalimit); 141 #endif /* SOSEND_COUNTERS */ 142 } 143 144 #ifdef SOSEND_NO_LOAN 145 int use_sosend_loan = 0; 146 #else 147 int use_sosend_loan = 1; 148 #endif 149 150 struct mbuf *so_pendfree; 151 152 #ifndef SOMAXKVA 153 #define SOMAXKVA (16 * 1024 * 1024) 154 #endif 155 int somaxkva = SOMAXKVA; 156 int socurkva; 157 int sokvawaiters; 158 159 #define SOCK_LOAN_THRESH 4096 160 #define SOCK_LOAN_CHUNK 65536 161 162 static size_t sodopendfree(struct socket *); 163 164 vaddr_t 165 sokvaalloc(vsize_t len, struct socket *so) 166 { 167 vaddr_t lva; 168 int s; 169 170 while (socurkva + len > somaxkva) { 171 if (sodopendfree(so)) 172 continue; 173 SOSEND_COUNTER_INCR(&sosend_kvalimit); 174 s = splvm(); 175 sokvawaiters++; 176 (void) tsleep(&socurkva, PVM, "sokva", 0); 177 sokvawaiters--; 178 splx(s); 179 } 180 181 lva = uvm_km_valloc_wait(kernel_map, len); 182 if (lva == 0) 183 return (0); 184 socurkva += len; 185 186 return lva; 187 } 188 189 void 190 sokvafree(vaddr_t sva, vsize_t len) 191 { 192 193 uvm_km_free(kernel_map, sva, len); 194 socurkva -= len; 195 if (sokvawaiters) 196 wakeup(&socurkva); 197 } 198 199 static void 200 sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) 201 { 202 vaddr_t va, sva, eva; 203 vsize_t len; 204 paddr_t pa; 205 int i, npgs; 206 207 eva = round_page((vaddr_t) buf + size); 208 sva = trunc_page((vaddr_t) buf); 209 len = eva - sva; 210 npgs = len >> PAGE_SHIFT; 211 212 if (__predict_false(pgs == NULL)) { 213 pgs = alloca(npgs * sizeof(*pgs)); 214 215 for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { 216 if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) 217 panic("sodoloanfree: va 0x%lx not mapped", va); 218 pgs[i] = PHYS_TO_VM_PAGE(pa); 219 } 220 } 221 222 pmap_kremove(sva, len); 223 pmap_update(pmap_kernel()); 224 uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); 225 sokvafree(sva, len); 226 } 227 228 static size_t 229 sodopendfree(struct socket *so) 230 { 231 struct mbuf *m; 232 size_t rv = 0; 233 int s; 234 235 s = splvm(); 236 237 for (;;) { 238 m = so_pendfree; 239 if (m == NULL) 240 break; 241 so_pendfree = m->m_next; 242 splx(s); 243 244 rv += m->m_ext.ext_size; 245 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 246 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 247 m->m_ext.ext_size); 248 s = splvm(); 249 pool_cache_put(&mbpool_cache, m); 250 } 251 252 for (;;) { 253 m = so->so_pendfree; 254 if (m == NULL) 255 break; 256 so->so_pendfree = m->m_next; 257 splx(s); 258 259 rv += m->m_ext.ext_size; 260 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 261 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 262 m->m_ext.ext_size); 263 s = splvm(); 264 pool_cache_put(&mbpool_cache, m); 265 } 266 267 splx(s); 268 return (rv); 269 } 270 271 void 272 soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) 273 { 274 struct socket *so = arg; 275 int s; 276 277 if (m == NULL) { 278 sodoloanfree(NULL, buf, size); 279 return; 280 } 281 282 s = splvm(); 283 m->m_next = so->so_pendfree; 284 so->so_pendfree = m; 285 splx(s); 286 if (sokvawaiters) 287 wakeup(&socurkva); 288 } 289 290 static long 291 sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space) 292 { 293 struct iovec *iov = uio->uio_iov; 294 vaddr_t sva, eva; 295 vsize_t len; 296 vaddr_t lva, va; 297 int npgs, i, error; 298 299 if (uio->uio_segflg != UIO_USERSPACE) 300 return (0); 301 302 if (iov->iov_len < (size_t) space) 303 space = iov->iov_len; 304 if (space > SOCK_LOAN_CHUNK) 305 space = SOCK_LOAN_CHUNK; 306 307 eva = round_page((vaddr_t) iov->iov_base + space); 308 sva = trunc_page((vaddr_t) iov->iov_base); 309 len = eva - sva; 310 npgs = len >> PAGE_SHIFT; 311 312 /* XXX KDASSERT */ 313 KASSERT(npgs <= M_EXT_MAXPAGES); 314 315 lva = sokvaalloc(len, so); 316 if (lva == 0) 317 return 0; 318 319 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, 320 m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); 321 if (error) { 322 sokvafree(lva, len); 323 return (0); 324 } 325 326 for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) 327 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), 328 VM_PROT_READ); 329 pmap_update(pmap_kernel()); 330 331 lva += (vaddr_t) iov->iov_base & PAGE_MASK; 332 333 MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); 334 m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; 335 336 uio->uio_resid -= space; 337 /* uio_offset not updated, not set/used for write(2) */ 338 uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; 339 uio->uio_iov->iov_len -= space; 340 if (uio->uio_iov->iov_len == 0) { 341 uio->uio_iov++; 342 uio->uio_iovcnt--; 343 } 344 345 return (space); 346 } 347 348 /* 349 * Socket operation routines. 350 * These routines are called by the routines in 351 * sys_socket.c or from a system process, and 352 * implement the semantics of socket operations by 353 * switching out to the protocol specific routines. 354 */ 355 /*ARGSUSED*/ 356 int 357 socreate(int dom, struct socket **aso, int type, int proto) 358 { 359 struct proc *p; 360 struct protosw *prp; 361 struct socket *so; 362 int error, s; 363 364 p = curproc; /* XXX */ 365 if (proto) 366 prp = pffindproto(dom, proto, type); 367 else 368 prp = pffindtype(dom, type); 369 if (prp == 0 || prp->pr_usrreq == 0) 370 return (EPROTONOSUPPORT); 371 if (prp->pr_type != type) 372 return (EPROTOTYPE); 373 s = splsoftnet(); 374 so = pool_get(&socket_pool, PR_WAITOK); 375 memset((caddr_t)so, 0, sizeof(*so)); 376 TAILQ_INIT(&so->so_q0); 377 TAILQ_INIT(&so->so_q); 378 so->so_type = type; 379 so->so_proto = prp; 380 so->so_send = sosend; 381 so->so_receive = soreceive; 382 #ifdef MBUFTRACE 383 so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; 384 so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; 385 so->so_mowner = &prp->pr_domain->dom_mowner; 386 #endif 387 if (p != 0) 388 so->so_uid = p->p_ucred->cr_uid; 389 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 390 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 391 if (error) { 392 so->so_state |= SS_NOFDREF; 393 sofree(so); 394 splx(s); 395 return (error); 396 } 397 splx(s); 398 *aso = so; 399 return (0); 400 } 401 402 int 403 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 404 { 405 int s, error; 406 407 s = splsoftnet(); 408 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 409 nam, (struct mbuf *)0, p); 410 splx(s); 411 return (error); 412 } 413 414 int 415 solisten(struct socket *so, int backlog) 416 { 417 int s, error; 418 419 s = splsoftnet(); 420 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 421 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 422 if (error) { 423 splx(s); 424 return (error); 425 } 426 if (TAILQ_EMPTY(&so->so_q)) 427 so->so_options |= SO_ACCEPTCONN; 428 if (backlog < 0) 429 backlog = 0; 430 so->so_qlimit = min(backlog, somaxconn); 431 splx(s); 432 return (0); 433 } 434 435 void 436 sofree(struct socket *so) 437 { 438 struct mbuf *m; 439 440 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 441 return; 442 if (so->so_head) { 443 /* 444 * We must not decommission a socket that's on the accept(2) 445 * queue. If we do, then accept(2) may hang after select(2) 446 * indicated that the listening socket was ready. 447 */ 448 if (!soqremque(so, 0)) 449 return; 450 } 451 sbrelease(&so->so_snd); 452 sorflush(so); 453 while ((m = so->so_pendfree) != NULL) { 454 so->so_pendfree = m->m_next; 455 m->m_next = so_pendfree; 456 so_pendfree = m; 457 } 458 pool_put(&socket_pool, so); 459 } 460 461 /* 462 * Close a socket on last file table reference removal. 463 * Initiate disconnect if connected. 464 * Free socket when disconnect complete. 465 */ 466 int 467 soclose(struct socket *so) 468 { 469 struct socket *so2; 470 int s, error; 471 472 error = 0; 473 s = splsoftnet(); /* conservative */ 474 if (so->so_options & SO_ACCEPTCONN) { 475 while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { 476 (void) soqremque(so2, 0); 477 (void) soabort(so2); 478 } 479 while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { 480 (void) soqremque(so2, 1); 481 (void) soabort(so2); 482 } 483 } 484 if (so->so_pcb == 0) 485 goto discard; 486 if (so->so_state & SS_ISCONNECTED) { 487 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 488 error = sodisconnect(so); 489 if (error) 490 goto drop; 491 } 492 if (so->so_options & SO_LINGER) { 493 if ((so->so_state & SS_ISDISCONNECTING) && 494 (so->so_state & SS_NBIO)) 495 goto drop; 496 while (so->so_state & SS_ISCONNECTED) { 497 error = tsleep((caddr_t)&so->so_timeo, 498 PSOCK | PCATCH, netcls, 499 so->so_linger * hz); 500 if (error) 501 break; 502 } 503 } 504 } 505 drop: 506 if (so->so_pcb) { 507 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 508 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 509 (struct proc *)0); 510 if (error == 0) 511 error = error2; 512 } 513 discard: 514 if (so->so_state & SS_NOFDREF) 515 panic("soclose: NOFDREF"); 516 so->so_state |= SS_NOFDREF; 517 sofree(so); 518 splx(s); 519 return (error); 520 } 521 522 /* 523 * Must be called at splsoftnet... 524 */ 525 int 526 soabort(struct socket *so) 527 { 528 529 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 530 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 531 } 532 533 int 534 soaccept(struct socket *so, struct mbuf *nam) 535 { 536 int s, error; 537 538 error = 0; 539 s = splsoftnet(); 540 if ((so->so_state & SS_NOFDREF) == 0) 541 panic("soaccept: !NOFDREF"); 542 so->so_state &= ~SS_NOFDREF; 543 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 544 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 545 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 546 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 547 else 548 error = ECONNABORTED; 549 550 splx(s); 551 return (error); 552 } 553 554 int 555 soconnect(struct socket *so, struct mbuf *nam) 556 { 557 struct proc *p; 558 int s, error; 559 560 p = curproc; /* XXX */ 561 if (so->so_options & SO_ACCEPTCONN) 562 return (EOPNOTSUPP); 563 s = splsoftnet(); 564 /* 565 * If protocol is connection-based, can only connect once. 566 * Otherwise, if connected, try to disconnect first. 567 * This allows user to disconnect by connecting to, e.g., 568 * a null address. 569 */ 570 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 571 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 572 (error = sodisconnect(so)))) 573 error = EISCONN; 574 else 575 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 576 (struct mbuf *)0, nam, (struct mbuf *)0, p); 577 splx(s); 578 return (error); 579 } 580 581 int 582 soconnect2(struct socket *so1, struct socket *so2) 583 { 584 int s, error; 585 586 s = splsoftnet(); 587 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 588 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 589 (struct proc *)0); 590 splx(s); 591 return (error); 592 } 593 594 int 595 sodisconnect(struct socket *so) 596 { 597 int s, error; 598 599 s = splsoftnet(); 600 if ((so->so_state & SS_ISCONNECTED) == 0) { 601 error = ENOTCONN; 602 goto bad; 603 } 604 if (so->so_state & SS_ISDISCONNECTING) { 605 error = EALREADY; 606 goto bad; 607 } 608 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 609 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 610 (struct proc *)0); 611 bad: 612 splx(s); 613 sodopendfree(so); 614 return (error); 615 } 616 617 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 618 /* 619 * Send on a socket. 620 * If send must go all at once and message is larger than 621 * send buffering, then hard error. 622 * Lock against other senders. 623 * If must go all at once and not enough room now, then 624 * inform user that this would block and do nothing. 625 * Otherwise, if nonblocking, send as much as possible. 626 * The data to be sent is described by "uio" if nonzero, 627 * otherwise by the mbuf chain "top" (which must be null 628 * if uio is not). Data provided in mbuf chain must be small 629 * enough to send all at once. 630 * 631 * Returns nonzero on error, timeout or signal; callers 632 * must check for short counts if EINTR/ERESTART are returned. 633 * Data and control buffers are freed on return. 634 */ 635 int 636 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 637 struct mbuf *control, int flags) 638 { 639 struct proc *p; 640 struct mbuf **mp, *m; 641 long space, len, resid, clen, mlen; 642 int error, s, dontroute, atomic; 643 644 sodopendfree(so); 645 646 p = curproc; /* XXX */ 647 clen = 0; 648 atomic = sosendallatonce(so) || top; 649 if (uio) 650 resid = uio->uio_resid; 651 else 652 resid = top->m_pkthdr.len; 653 /* 654 * In theory resid should be unsigned. 655 * However, space must be signed, as it might be less than 0 656 * if we over-committed, and we must use a signed comparison 657 * of space and resid. On the other hand, a negative resid 658 * causes us to loop sending 0-length segments to the protocol. 659 */ 660 if (resid < 0) { 661 error = EINVAL; 662 goto out; 663 } 664 dontroute = 665 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 666 (so->so_proto->pr_flags & PR_ATOMIC); 667 p->p_stats->p_ru.ru_msgsnd++; 668 if (control) 669 clen = control->m_len; 670 #define snderr(errno) { error = errno; splx(s); goto release; } 671 672 restart: 673 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 674 goto out; 675 do { 676 s = splsoftnet(); 677 if (so->so_state & SS_CANTSENDMORE) 678 snderr(EPIPE); 679 if (so->so_error) { 680 error = so->so_error; 681 so->so_error = 0; 682 splx(s); 683 goto release; 684 } 685 if ((so->so_state & SS_ISCONNECTED) == 0) { 686 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 687 if ((so->so_state & SS_ISCONFIRMING) == 0 && 688 !(resid == 0 && clen != 0)) 689 snderr(ENOTCONN); 690 } else if (addr == 0) 691 snderr(EDESTADDRREQ); 692 } 693 space = sbspace(&so->so_snd); 694 if (flags & MSG_OOB) 695 space += 1024; 696 if ((atomic && resid > so->so_snd.sb_hiwat) || 697 clen > so->so_snd.sb_hiwat) 698 snderr(EMSGSIZE); 699 if (space < resid + clen && uio && 700 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 701 if (so->so_state & SS_NBIO) 702 snderr(EWOULDBLOCK); 703 sbunlock(&so->so_snd); 704 error = sbwait(&so->so_snd); 705 splx(s); 706 if (error) 707 goto out; 708 goto restart; 709 } 710 splx(s); 711 mp = ⊤ 712 space -= clen; 713 do { 714 if (uio == NULL) { 715 /* 716 * Data is prepackaged in "top". 717 */ 718 resid = 0; 719 if (flags & MSG_EOR) 720 top->m_flags |= M_EOR; 721 } else do { 722 if (top == 0) { 723 m = m_gethdr(M_WAIT, MT_DATA); 724 mlen = MHLEN; 725 m->m_pkthdr.len = 0; 726 m->m_pkthdr.rcvif = (struct ifnet *)0; 727 } else { 728 m = m_get(M_WAIT, MT_DATA); 729 mlen = MLEN; 730 } 731 MCLAIM(m, so->so_snd.sb_mowner); 732 if (use_sosend_loan && 733 uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && 734 space >= SOCK_LOAN_THRESH && 735 (len = sosend_loan(so, uio, m, 736 space)) != 0) { 737 SOSEND_COUNTER_INCR(&sosend_loan_big); 738 space -= len; 739 goto have_data; 740 } 741 if (resid >= MINCLSIZE && space >= MCLBYTES) { 742 SOSEND_COUNTER_INCR(&sosend_copy_big); 743 m_clget(m, M_WAIT); 744 if ((m->m_flags & M_EXT) == 0) 745 goto nopages; 746 mlen = MCLBYTES; 747 if (atomic && top == 0) { 748 len = lmin(MCLBYTES - max_hdr, 749 resid); 750 m->m_data += max_hdr; 751 } else 752 len = lmin(MCLBYTES, resid); 753 space -= len; 754 } else { 755 nopages: 756 SOSEND_COUNTER_INCR(&sosend_copy_small); 757 len = lmin(lmin(mlen, resid), space); 758 space -= len; 759 /* 760 * For datagram protocols, leave room 761 * for protocol headers in first mbuf. 762 */ 763 if (atomic && top == 0 && len < mlen) 764 MH_ALIGN(m, len); 765 } 766 error = uiomove(mtod(m, caddr_t), (int)len, 767 uio); 768 have_data: 769 resid = uio->uio_resid; 770 m->m_len = len; 771 *mp = m; 772 top->m_pkthdr.len += len; 773 if (error) 774 goto release; 775 mp = &m->m_next; 776 if (resid <= 0) { 777 if (flags & MSG_EOR) 778 top->m_flags |= M_EOR; 779 break; 780 } 781 } while (space > 0 && atomic); 782 783 s = splsoftnet(); 784 785 if (so->so_state & SS_CANTSENDMORE) 786 snderr(EPIPE); 787 788 if (dontroute) 789 so->so_options |= SO_DONTROUTE; 790 if (resid > 0) 791 so->so_state |= SS_MORETOCOME; 792 error = (*so->so_proto->pr_usrreq)(so, 793 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 794 top, addr, control, p); 795 if (dontroute) 796 so->so_options &= ~SO_DONTROUTE; 797 if (resid > 0) 798 so->so_state &= ~SS_MORETOCOME; 799 splx(s); 800 801 clen = 0; 802 control = 0; 803 top = 0; 804 mp = ⊤ 805 if (error) 806 goto release; 807 } while (resid && space > 0); 808 } while (resid); 809 810 release: 811 sbunlock(&so->so_snd); 812 out: 813 if (top) 814 m_freem(top); 815 if (control) 816 m_freem(control); 817 return (error); 818 } 819 820 /* 821 * Implement receive operations on a socket. 822 * We depend on the way that records are added to the sockbuf 823 * by sbappend*. In particular, each record (mbufs linked through m_next) 824 * must begin with an address if the protocol so specifies, 825 * followed by an optional mbuf or mbufs containing ancillary data, 826 * and then zero or more mbufs of data. 827 * In order to avoid blocking network interrupts for the entire time here, 828 * we splx() while doing the actual copy to user space. 829 * Although the sockbuf is locked, new data may still be appended, 830 * and thus we must maintain consistency of the sockbuf during that time. 831 * 832 * The caller may receive the data as a single mbuf chain by supplying 833 * an mbuf **mp0 for use in returning the chain. The uio is then used 834 * only for the count in uio_resid. 835 */ 836 int 837 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 838 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 839 { 840 struct mbuf *m, **mp; 841 int flags, len, error, s, offset, moff, type, orig_resid; 842 struct protosw *pr; 843 struct mbuf *nextrecord; 844 int mbuf_removed = 0; 845 846 pr = so->so_proto; 847 mp = mp0; 848 type = 0; 849 orig_resid = uio->uio_resid; 850 if (paddr) 851 *paddr = 0; 852 if (controlp) 853 *controlp = 0; 854 if (flagsp) 855 flags = *flagsp &~ MSG_EOR; 856 else 857 flags = 0; 858 859 if ((flags & MSG_DONTWAIT) == 0) 860 sodopendfree(so); 861 862 if (flags & MSG_OOB) { 863 m = m_get(M_WAIT, MT_DATA); 864 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 865 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 866 (struct proc *)0); 867 if (error) 868 goto bad; 869 do { 870 error = uiomove(mtod(m, caddr_t), 871 (int) min(uio->uio_resid, m->m_len), uio); 872 m = m_free(m); 873 } while (uio->uio_resid && error == 0 && m); 874 bad: 875 if (m) 876 m_freem(m); 877 return (error); 878 } 879 if (mp) 880 *mp = (struct mbuf *)0; 881 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 882 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 883 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 884 885 restart: 886 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 887 return (error); 888 s = splsoftnet(); 889 890 m = so->so_rcv.sb_mb; 891 /* 892 * If we have less data than requested, block awaiting more 893 * (subject to any timeout) if: 894 * 1. the current count is less than the low water mark, 895 * 2. MSG_WAITALL is set, and it is possible to do the entire 896 * receive operation at once if we block (resid <= hiwat), or 897 * 3. MSG_DONTWAIT is not set. 898 * If MSG_WAITALL is set but resid is larger than the receive buffer, 899 * we have to do the receive in sections, and thus risk returning 900 * a short count if a timeout or signal occurs after we start. 901 */ 902 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 903 so->so_rcv.sb_cc < uio->uio_resid) && 904 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 905 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 906 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 907 #ifdef DIAGNOSTIC 908 if (m == 0 && so->so_rcv.sb_cc) 909 panic("receive 1"); 910 #endif 911 if (so->so_error) { 912 if (m) 913 goto dontblock; 914 error = so->so_error; 915 if ((flags & MSG_PEEK) == 0) 916 so->so_error = 0; 917 goto release; 918 } 919 if (so->so_state & SS_CANTRCVMORE) { 920 if (m) 921 goto dontblock; 922 else 923 goto release; 924 } 925 for (; m; m = m->m_next) 926 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 927 m = so->so_rcv.sb_mb; 928 goto dontblock; 929 } 930 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 931 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 932 error = ENOTCONN; 933 goto release; 934 } 935 if (uio->uio_resid == 0) 936 goto release; 937 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 938 error = EWOULDBLOCK; 939 goto release; 940 } 941 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 942 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 943 sbunlock(&so->so_rcv); 944 error = sbwait(&so->so_rcv); 945 splx(s); 946 if (error) 947 return (error); 948 goto restart; 949 } 950 dontblock: 951 /* 952 * On entry here, m points to the first record of the socket buffer. 953 * While we process the initial mbufs containing address and control 954 * info, we save a copy of m->m_nextpkt into nextrecord. 955 */ 956 #ifdef notyet /* XXXX */ 957 if (uio->uio_procp) 958 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 959 #endif 960 KASSERT(m == so->so_rcv.sb_mb); 961 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 962 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 963 nextrecord = m->m_nextpkt; 964 if (pr->pr_flags & PR_ADDR) { 965 #ifdef DIAGNOSTIC 966 if (m->m_type != MT_SONAME) 967 panic("receive 1a"); 968 #endif 969 orig_resid = 0; 970 if (flags & MSG_PEEK) { 971 if (paddr) 972 *paddr = m_copy(m, 0, m->m_len); 973 m = m->m_next; 974 } else { 975 sbfree(&so->so_rcv, m); 976 mbuf_removed = 1; 977 if (paddr) { 978 *paddr = m; 979 so->so_rcv.sb_mb = m->m_next; 980 m->m_next = 0; 981 m = so->so_rcv.sb_mb; 982 } else { 983 MFREE(m, so->so_rcv.sb_mb); 984 m = so->so_rcv.sb_mb; 985 } 986 } 987 } 988 while (m && m->m_type == MT_CONTROL && error == 0) { 989 if (flags & MSG_PEEK) { 990 if (controlp) 991 *controlp = m_copy(m, 0, m->m_len); 992 m = m->m_next; 993 } else { 994 sbfree(&so->so_rcv, m); 995 mbuf_removed = 1; 996 if (controlp) { 997 if (pr->pr_domain->dom_externalize && 998 mtod(m, struct cmsghdr *)->cmsg_type == 999 SCM_RIGHTS) 1000 error = (*pr->pr_domain->dom_externalize)(m); 1001 *controlp = m; 1002 so->so_rcv.sb_mb = m->m_next; 1003 m->m_next = 0; 1004 m = so->so_rcv.sb_mb; 1005 } else { 1006 MFREE(m, so->so_rcv.sb_mb); 1007 m = so->so_rcv.sb_mb; 1008 } 1009 } 1010 if (controlp) { 1011 orig_resid = 0; 1012 controlp = &(*controlp)->m_next; 1013 } 1014 } 1015 1016 /* 1017 * If m is non-NULL, we have some data to read. From now on, 1018 * make sure to keep sb_lastrecord consistent when working on 1019 * the last packet on the chain (nextrecord == NULL) and we 1020 * change m->m_nextpkt. 1021 */ 1022 if (m) { 1023 if ((flags & MSG_PEEK) == 0) { 1024 m->m_nextpkt = nextrecord; 1025 /* 1026 * If nextrecord == NULL (this is a single chain), 1027 * then sb_lastrecord may not be valid here if m 1028 * was changed earlier. 1029 */ 1030 if (nextrecord == NULL) { 1031 KASSERT(so->so_rcv.sb_mb == m); 1032 so->so_rcv.sb_lastrecord = m; 1033 } 1034 } 1035 type = m->m_type; 1036 if (type == MT_OOBDATA) 1037 flags |= MSG_OOB; 1038 } else { 1039 if ((flags & MSG_PEEK) == 0) { 1040 KASSERT(so->so_rcv.sb_mb == m); 1041 so->so_rcv.sb_mb = nextrecord; 1042 SB_EMPTY_FIXUP(&so->so_rcv); 1043 } 1044 } 1045 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1046 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1047 1048 moff = 0; 1049 offset = 0; 1050 while (m && uio->uio_resid > 0 && error == 0) { 1051 if (m->m_type == MT_OOBDATA) { 1052 if (type != MT_OOBDATA) 1053 break; 1054 } else if (type == MT_OOBDATA) 1055 break; 1056 #ifdef DIAGNOSTIC 1057 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 1058 panic("receive 3"); 1059 #endif 1060 so->so_state &= ~SS_RCVATMARK; 1061 len = uio->uio_resid; 1062 if (so->so_oobmark && len > so->so_oobmark - offset) 1063 len = so->so_oobmark - offset; 1064 if (len > m->m_len - moff) 1065 len = m->m_len - moff; 1066 /* 1067 * If mp is set, just pass back the mbufs. 1068 * Otherwise copy them out via the uio, then free. 1069 * Sockbuf must be consistent here (points to current mbuf, 1070 * it points to next record) when we drop priority; 1071 * we must note any additions to the sockbuf when we 1072 * block interrupts again. 1073 */ 1074 if (mp == 0) { 1075 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1076 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1077 splx(s); 1078 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 1079 s = splsoftnet(); 1080 if (error) { 1081 /* 1082 * If any part of the record has been removed 1083 * (such as the MT_SONAME mbuf, which will 1084 * happen when PR_ADDR, and thus also 1085 * PR_ATOMIC, is set), then drop the entire 1086 * record to maintain the atomicity of the 1087 * receive operation. 1088 * 1089 * This avoids a later panic("receive 1a") 1090 * when compiled with DIAGNOSTIC. 1091 */ 1092 if (m && mbuf_removed 1093 && (pr->pr_flags & PR_ATOMIC)) 1094 (void) sbdroprecord(&so->so_rcv); 1095 1096 goto release; 1097 } 1098 } else 1099 uio->uio_resid -= len; 1100 if (len == m->m_len - moff) { 1101 if (m->m_flags & M_EOR) 1102 flags |= MSG_EOR; 1103 if (flags & MSG_PEEK) { 1104 m = m->m_next; 1105 moff = 0; 1106 } else { 1107 nextrecord = m->m_nextpkt; 1108 sbfree(&so->so_rcv, m); 1109 if (mp) { 1110 *mp = m; 1111 mp = &m->m_next; 1112 so->so_rcv.sb_mb = m = m->m_next; 1113 *mp = (struct mbuf *)0; 1114 } else { 1115 MFREE(m, so->so_rcv.sb_mb); 1116 m = so->so_rcv.sb_mb; 1117 } 1118 /* 1119 * If m != NULL, we also know that 1120 * so->so_rcv.sb_mb != NULL. 1121 */ 1122 KASSERT(so->so_rcv.sb_mb == m); 1123 if (m) { 1124 m->m_nextpkt = nextrecord; 1125 if (nextrecord == NULL) 1126 so->so_rcv.sb_lastrecord = m; 1127 } else { 1128 so->so_rcv.sb_mb = nextrecord; 1129 SB_EMPTY_FIXUP(&so->so_rcv); 1130 } 1131 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1132 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1133 } 1134 } else { 1135 if (flags & MSG_PEEK) 1136 moff += len; 1137 else { 1138 if (mp) 1139 *mp = m_copym(m, 0, len, M_WAIT); 1140 m->m_data += len; 1141 m->m_len -= len; 1142 so->so_rcv.sb_cc -= len; 1143 } 1144 } 1145 if (so->so_oobmark) { 1146 if ((flags & MSG_PEEK) == 0) { 1147 so->so_oobmark -= len; 1148 if (so->so_oobmark == 0) { 1149 so->so_state |= SS_RCVATMARK; 1150 break; 1151 } 1152 } else { 1153 offset += len; 1154 if (offset == so->so_oobmark) 1155 break; 1156 } 1157 } 1158 if (flags & MSG_EOR) 1159 break; 1160 /* 1161 * If the MSG_WAITALL flag is set (for non-atomic socket), 1162 * we must not quit until "uio->uio_resid == 0" or an error 1163 * termination. If a signal/timeout occurs, return 1164 * with a short count but without error. 1165 * Keep sockbuf locked against other readers. 1166 */ 1167 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 1168 !sosendallatonce(so) && !nextrecord) { 1169 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1170 break; 1171 /* 1172 * If we are peeking and the socket receive buffer is 1173 * full, stop since we can't get more data to peek at. 1174 */ 1175 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 1176 break; 1177 /* 1178 * If we've drained the socket buffer, tell the 1179 * protocol in case it needs to do something to 1180 * get it filled again. 1181 */ 1182 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1183 (*pr->pr_usrreq)(so, PRU_RCVD, 1184 (struct mbuf *)0, 1185 (struct mbuf *)(long)flags, 1186 (struct mbuf *)0, 1187 (struct proc *)0); 1188 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1189 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1190 error = sbwait(&so->so_rcv); 1191 if (error) { 1192 sbunlock(&so->so_rcv); 1193 splx(s); 1194 return (0); 1195 } 1196 if ((m = so->so_rcv.sb_mb) != NULL) 1197 nextrecord = m->m_nextpkt; 1198 } 1199 } 1200 1201 if (m && pr->pr_flags & PR_ATOMIC) { 1202 flags |= MSG_TRUNC; 1203 if ((flags & MSG_PEEK) == 0) 1204 (void) sbdroprecord(&so->so_rcv); 1205 } 1206 if ((flags & MSG_PEEK) == 0) { 1207 if (m == 0) { 1208 /* 1209 * First part is an inline SB_EMPTY_FIXUP(). Second 1210 * part makes sure sb_lastrecord is up-to-date if 1211 * there is still data in the socket buffer. 1212 */ 1213 so->so_rcv.sb_mb = nextrecord; 1214 if (so->so_rcv.sb_mb == NULL) { 1215 so->so_rcv.sb_mbtail = NULL; 1216 so->so_rcv.sb_lastrecord = NULL; 1217 } else if (nextrecord->m_nextpkt == NULL) 1218 so->so_rcv.sb_lastrecord = nextrecord; 1219 } 1220 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1221 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1222 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1223 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 1224 (struct mbuf *)(long)flags, (struct mbuf *)0, 1225 (struct proc *)0); 1226 } 1227 if (orig_resid == uio->uio_resid && orig_resid && 1228 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1229 sbunlock(&so->so_rcv); 1230 splx(s); 1231 goto restart; 1232 } 1233 1234 if (flagsp) 1235 *flagsp |= flags; 1236 release: 1237 sbunlock(&so->so_rcv); 1238 splx(s); 1239 return (error); 1240 } 1241 1242 int 1243 soshutdown(struct socket *so, int how) 1244 { 1245 struct protosw *pr; 1246 1247 pr = so->so_proto; 1248 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1249 return (EINVAL); 1250 1251 if (how == SHUT_RD || how == SHUT_RDWR) 1252 sorflush(so); 1253 if (how == SHUT_WR || how == SHUT_RDWR) 1254 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 1255 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 1256 return (0); 1257 } 1258 1259 void 1260 sorflush(struct socket *so) 1261 { 1262 struct sockbuf *sb, asb; 1263 struct protosw *pr; 1264 int s; 1265 1266 sb = &so->so_rcv; 1267 pr = so->so_proto; 1268 sb->sb_flags |= SB_NOINTR; 1269 (void) sblock(sb, M_WAITOK); 1270 s = splnet(); 1271 socantrcvmore(so); 1272 sbunlock(sb); 1273 asb = *sb; 1274 /* 1275 * Clear most of the sockbuf structure, but leave some of the 1276 * fields valid. 1277 */ 1278 memset(&sb->sb_startzero, 0, 1279 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 1280 splx(s); 1281 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1282 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1283 sbrelease(&asb); 1284 } 1285 1286 int 1287 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1288 { 1289 int error; 1290 struct mbuf *m; 1291 1292 error = 0; 1293 m = m0; 1294 if (level != SOL_SOCKET) { 1295 if (so->so_proto && so->so_proto->pr_ctloutput) 1296 return ((*so->so_proto->pr_ctloutput) 1297 (PRCO_SETOPT, so, level, optname, &m0)); 1298 error = ENOPROTOOPT; 1299 } else { 1300 switch (optname) { 1301 1302 case SO_LINGER: 1303 if (m == NULL || m->m_len != sizeof(struct linger)) { 1304 error = EINVAL; 1305 goto bad; 1306 } 1307 so->so_linger = mtod(m, struct linger *)->l_linger; 1308 /* fall thru... */ 1309 1310 case SO_DEBUG: 1311 case SO_KEEPALIVE: 1312 case SO_DONTROUTE: 1313 case SO_USELOOPBACK: 1314 case SO_BROADCAST: 1315 case SO_REUSEADDR: 1316 case SO_REUSEPORT: 1317 case SO_OOBINLINE: 1318 case SO_TIMESTAMP: 1319 if (m == NULL || m->m_len < sizeof(int)) { 1320 error = EINVAL; 1321 goto bad; 1322 } 1323 if (*mtod(m, int *)) 1324 so->so_options |= optname; 1325 else 1326 so->so_options &= ~optname; 1327 break; 1328 1329 case SO_SNDBUF: 1330 case SO_RCVBUF: 1331 case SO_SNDLOWAT: 1332 case SO_RCVLOWAT: 1333 { 1334 int optval; 1335 1336 if (m == NULL || m->m_len < sizeof(int)) { 1337 error = EINVAL; 1338 goto bad; 1339 } 1340 1341 /* 1342 * Values < 1 make no sense for any of these 1343 * options, so disallow them. 1344 */ 1345 optval = *mtod(m, int *); 1346 if (optval < 1) { 1347 error = EINVAL; 1348 goto bad; 1349 } 1350 1351 switch (optname) { 1352 1353 case SO_SNDBUF: 1354 case SO_RCVBUF: 1355 if (sbreserve(optname == SO_SNDBUF ? 1356 &so->so_snd : &so->so_rcv, 1357 (u_long) optval) == 0) { 1358 error = ENOBUFS; 1359 goto bad; 1360 } 1361 break; 1362 1363 /* 1364 * Make sure the low-water is never greater than 1365 * the high-water. 1366 */ 1367 case SO_SNDLOWAT: 1368 so->so_snd.sb_lowat = 1369 (optval > so->so_snd.sb_hiwat) ? 1370 so->so_snd.sb_hiwat : optval; 1371 break; 1372 case SO_RCVLOWAT: 1373 so->so_rcv.sb_lowat = 1374 (optval > so->so_rcv.sb_hiwat) ? 1375 so->so_rcv.sb_hiwat : optval; 1376 break; 1377 } 1378 break; 1379 } 1380 1381 case SO_SNDTIMEO: 1382 case SO_RCVTIMEO: 1383 { 1384 struct timeval *tv; 1385 short val; 1386 1387 if (m == NULL || m->m_len < sizeof(*tv)) { 1388 error = EINVAL; 1389 goto bad; 1390 } 1391 tv = mtod(m, struct timeval *); 1392 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { 1393 error = EDOM; 1394 goto bad; 1395 } 1396 val = tv->tv_sec * hz + tv->tv_usec / tick; 1397 if (val == 0 && tv->tv_usec != 0) 1398 val = 1; 1399 1400 switch (optname) { 1401 1402 case SO_SNDTIMEO: 1403 so->so_snd.sb_timeo = val; 1404 break; 1405 case SO_RCVTIMEO: 1406 so->so_rcv.sb_timeo = val; 1407 break; 1408 } 1409 break; 1410 } 1411 1412 default: 1413 error = ENOPROTOOPT; 1414 break; 1415 } 1416 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1417 (void) ((*so->so_proto->pr_ctloutput) 1418 (PRCO_SETOPT, so, level, optname, &m0)); 1419 m = NULL; /* freed by protocol */ 1420 } 1421 } 1422 bad: 1423 if (m) 1424 (void) m_free(m); 1425 return (error); 1426 } 1427 1428 int 1429 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1430 { 1431 struct mbuf *m; 1432 1433 if (level != SOL_SOCKET) { 1434 if (so->so_proto && so->so_proto->pr_ctloutput) { 1435 return ((*so->so_proto->pr_ctloutput) 1436 (PRCO_GETOPT, so, level, optname, mp)); 1437 } else 1438 return (ENOPROTOOPT); 1439 } else { 1440 m = m_get(M_WAIT, MT_SOOPTS); 1441 m->m_len = sizeof(int); 1442 1443 switch (optname) { 1444 1445 case SO_LINGER: 1446 m->m_len = sizeof(struct linger); 1447 mtod(m, struct linger *)->l_onoff = 1448 so->so_options & SO_LINGER; 1449 mtod(m, struct linger *)->l_linger = so->so_linger; 1450 break; 1451 1452 case SO_USELOOPBACK: 1453 case SO_DONTROUTE: 1454 case SO_DEBUG: 1455 case SO_KEEPALIVE: 1456 case SO_REUSEADDR: 1457 case SO_REUSEPORT: 1458 case SO_BROADCAST: 1459 case SO_OOBINLINE: 1460 case SO_TIMESTAMP: 1461 *mtod(m, int *) = so->so_options & optname; 1462 break; 1463 1464 case SO_TYPE: 1465 *mtod(m, int *) = so->so_type; 1466 break; 1467 1468 case SO_ERROR: 1469 *mtod(m, int *) = so->so_error; 1470 so->so_error = 0; 1471 break; 1472 1473 case SO_SNDBUF: 1474 *mtod(m, int *) = so->so_snd.sb_hiwat; 1475 break; 1476 1477 case SO_RCVBUF: 1478 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1479 break; 1480 1481 case SO_SNDLOWAT: 1482 *mtod(m, int *) = so->so_snd.sb_lowat; 1483 break; 1484 1485 case SO_RCVLOWAT: 1486 *mtod(m, int *) = so->so_rcv.sb_lowat; 1487 break; 1488 1489 case SO_SNDTIMEO: 1490 case SO_RCVTIMEO: 1491 { 1492 int val = (optname == SO_SNDTIMEO ? 1493 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1494 1495 m->m_len = sizeof(struct timeval); 1496 mtod(m, struct timeval *)->tv_sec = val / hz; 1497 mtod(m, struct timeval *)->tv_usec = 1498 (val % hz) * tick; 1499 break; 1500 } 1501 1502 default: 1503 (void)m_free(m); 1504 return (ENOPROTOOPT); 1505 } 1506 *mp = m; 1507 return (0); 1508 } 1509 } 1510 1511 void 1512 sohasoutofband(struct socket *so) 1513 { 1514 fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); 1515 selwakeup(&so->so_rcv.sb_sel); 1516 } 1517 1518 static void 1519 filt_sordetach(struct knote *kn) 1520 { 1521 struct socket *so; 1522 1523 so = (struct socket *)kn->kn_fp->f_data; 1524 SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); 1525 if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) 1526 so->so_rcv.sb_flags &= ~SB_KNOTE; 1527 } 1528 1529 /*ARGSUSED*/ 1530 static int 1531 filt_soread(struct knote *kn, long hint) 1532 { 1533 struct socket *so; 1534 1535 so = (struct socket *)kn->kn_fp->f_data; 1536 kn->kn_data = so->so_rcv.sb_cc; 1537 if (so->so_state & SS_CANTRCVMORE) { 1538 kn->kn_flags |= EV_EOF; 1539 kn->kn_fflags = so->so_error; 1540 return (1); 1541 } 1542 if (so->so_error) /* temporary udp error */ 1543 return (1); 1544 if (kn->kn_sfflags & NOTE_LOWAT) 1545 return (kn->kn_data >= kn->kn_sdata); 1546 return (kn->kn_data >= so->so_rcv.sb_lowat); 1547 } 1548 1549 static void 1550 filt_sowdetach(struct knote *kn) 1551 { 1552 struct socket *so; 1553 1554 so = (struct socket *)kn->kn_fp->f_data; 1555 SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); 1556 if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) 1557 so->so_snd.sb_flags &= ~SB_KNOTE; 1558 } 1559 1560 /*ARGSUSED*/ 1561 static int 1562 filt_sowrite(struct knote *kn, long hint) 1563 { 1564 struct socket *so; 1565 1566 so = (struct socket *)kn->kn_fp->f_data; 1567 kn->kn_data = sbspace(&so->so_snd); 1568 if (so->so_state & SS_CANTSENDMORE) { 1569 kn->kn_flags |= EV_EOF; 1570 kn->kn_fflags = so->so_error; 1571 return (1); 1572 } 1573 if (so->so_error) /* temporary udp error */ 1574 return (1); 1575 if (((so->so_state & SS_ISCONNECTED) == 0) && 1576 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1577 return (0); 1578 if (kn->kn_sfflags & NOTE_LOWAT) 1579 return (kn->kn_data >= kn->kn_sdata); 1580 return (kn->kn_data >= so->so_snd.sb_lowat); 1581 } 1582 1583 /*ARGSUSED*/ 1584 static int 1585 filt_solisten(struct knote *kn, long hint) 1586 { 1587 struct socket *so; 1588 1589 so = (struct socket *)kn->kn_fp->f_data; 1590 1591 /* 1592 * Set kn_data to number of incoming connections, not 1593 * counting partial (incomplete) connections. 1594 */ 1595 kn->kn_data = so->so_qlen; 1596 return (kn->kn_data > 0); 1597 } 1598 1599 static const struct filterops solisten_filtops = 1600 { 1, NULL, filt_sordetach, filt_solisten }; 1601 static const struct filterops soread_filtops = 1602 { 1, NULL, filt_sordetach, filt_soread }; 1603 static const struct filterops sowrite_filtops = 1604 { 1, NULL, filt_sowdetach, filt_sowrite }; 1605 1606 int 1607 soo_kqfilter(struct file *fp, struct knote *kn) 1608 { 1609 struct socket *so; 1610 struct sockbuf *sb; 1611 1612 so = (struct socket *)kn->kn_fp->f_data; 1613 switch (kn->kn_filter) { 1614 case EVFILT_READ: 1615 if (so->so_options & SO_ACCEPTCONN) 1616 kn->kn_fop = &solisten_filtops; 1617 else 1618 kn->kn_fop = &soread_filtops; 1619 sb = &so->so_rcv; 1620 break; 1621 case EVFILT_WRITE: 1622 kn->kn_fop = &sowrite_filtops; 1623 sb = &so->so_snd; 1624 break; 1625 default: 1626 return (1); 1627 } 1628 SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); 1629 sb->sb_flags |= SB_KNOTE; 1630 return (0); 1631 } 1632 1633