1 /* $NetBSD: uipc_socket.c,v 1.85 2003/08/07 16:31:59 agc Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1982, 1986, 1988, 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 68 */ 69 70 #include <sys/cdefs.h> 71 __KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.85 2003/08/07 16:31:59 agc Exp $"); 72 73 #include "opt_sock_counters.h" 74 #include "opt_sosend_loan.h" 75 #include "opt_mbuftrace.h" 76 #include "opt_somaxkva.h" 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/proc.h> 81 #include <sys/file.h> 82 #include <sys/malloc.h> 83 #include <sys/mbuf.h> 84 #include <sys/domain.h> 85 #include <sys/kernel.h> 86 #include <sys/protosw.h> 87 #include <sys/socket.h> 88 #include <sys/socketvar.h> 89 #include <sys/signalvar.h> 90 #include <sys/resourcevar.h> 91 #include <sys/pool.h> 92 #include <sys/event.h> 93 94 #include <uvm/uvm.h> 95 96 struct pool socket_pool; 97 98 MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); 99 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 100 101 extern int somaxconn; /* patchable (XXX sysctl) */ 102 int somaxconn = SOMAXCONN; 103 104 #ifdef SOSEND_COUNTERS 105 #include <sys/device.h> 106 107 struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 108 NULL, "sosend", "loan big"); 109 struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 110 NULL, "sosend", "copy big"); 111 struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 112 NULL, "sosend", "copy small"); 113 struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 114 NULL, "sosend", "kva limit"); 115 116 #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ 117 118 #else 119 120 #define SOSEND_COUNTER_INCR(ev) /* nothing */ 121 122 #endif /* SOSEND_COUNTERS */ 123 124 void 125 soinit(void) 126 { 127 128 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 129 "sockpl", NULL); 130 131 #ifdef SOSEND_COUNTERS 132 evcnt_attach_static(&sosend_loan_big); 133 evcnt_attach_static(&sosend_copy_big); 134 evcnt_attach_static(&sosend_copy_small); 135 evcnt_attach_static(&sosend_kvalimit); 136 #endif /* SOSEND_COUNTERS */ 137 } 138 139 #ifdef SOSEND_NO_LOAN 140 int use_sosend_loan = 0; 141 #else 142 int use_sosend_loan = 1; 143 #endif 144 145 struct mbuf *so_pendfree; 146 147 #ifndef SOMAXKVA 148 #define SOMAXKVA (16 * 1024 * 1024) 149 #endif 150 int somaxkva = SOMAXKVA; 151 int socurkva; 152 int sokvawaiters; 153 154 #define SOCK_LOAN_THRESH 4096 155 #define SOCK_LOAN_CHUNK 65536 156 157 static size_t sodopendfree(struct socket *); 158 159 vaddr_t 160 sokvaalloc(vsize_t len, struct socket *so) 161 { 162 vaddr_t lva; 163 int s; 164 165 while (socurkva + len > somaxkva) { 166 if (sodopendfree(so)) 167 continue; 168 SOSEND_COUNTER_INCR(&sosend_kvalimit); 169 s = splvm(); 170 sokvawaiters++; 171 (void) tsleep(&socurkva, PVM, "sokva", 0); 172 sokvawaiters--; 173 splx(s); 174 } 175 176 lva = uvm_km_valloc_wait(kernel_map, len); 177 if (lva == 0) 178 return (0); 179 socurkva += len; 180 181 return lva; 182 } 183 184 void 185 sokvafree(vaddr_t sva, vsize_t len) 186 { 187 188 uvm_km_free(kernel_map, sva, len); 189 socurkva -= len; 190 if (sokvawaiters) 191 wakeup(&socurkva); 192 } 193 194 static void 195 sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) 196 { 197 vaddr_t va, sva, eva; 198 vsize_t len; 199 paddr_t pa; 200 int i, npgs; 201 202 eva = round_page((vaddr_t) buf + size); 203 sva = trunc_page((vaddr_t) buf); 204 len = eva - sva; 205 npgs = len >> PAGE_SHIFT; 206 207 if (__predict_false(pgs == NULL)) { 208 pgs = alloca(npgs * sizeof(*pgs)); 209 210 for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { 211 if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) 212 panic("sodoloanfree: va 0x%lx not mapped", va); 213 pgs[i] = PHYS_TO_VM_PAGE(pa); 214 } 215 } 216 217 pmap_kremove(sva, len); 218 pmap_update(pmap_kernel()); 219 uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); 220 sokvafree(sva, len); 221 } 222 223 static size_t 224 sodopendfree(struct socket *so) 225 { 226 struct mbuf *m; 227 size_t rv = 0; 228 int s; 229 230 s = splvm(); 231 232 for (;;) { 233 m = so_pendfree; 234 if (m == NULL) 235 break; 236 so_pendfree = m->m_next; 237 splx(s); 238 239 rv += m->m_ext.ext_size; 240 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 241 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 242 m->m_ext.ext_size); 243 s = splvm(); 244 pool_cache_put(&mbpool_cache, m); 245 } 246 247 for (;;) { 248 m = so->so_pendfree; 249 if (m == NULL) 250 break; 251 so->so_pendfree = m->m_next; 252 splx(s); 253 254 rv += m->m_ext.ext_size; 255 sodoloanfree((m->m_flags & M_EXT_PAGES) ? 256 m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, 257 m->m_ext.ext_size); 258 s = splvm(); 259 pool_cache_put(&mbpool_cache, m); 260 } 261 262 splx(s); 263 return (rv); 264 } 265 266 void 267 soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) 268 { 269 struct socket *so = arg; 270 int s; 271 272 if (m == NULL) { 273 sodoloanfree(NULL, buf, size); 274 return; 275 } 276 277 s = splvm(); 278 m->m_next = so->so_pendfree; 279 so->so_pendfree = m; 280 splx(s); 281 if (sokvawaiters) 282 wakeup(&socurkva); 283 } 284 285 static long 286 sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space) 287 { 288 struct iovec *iov = uio->uio_iov; 289 vaddr_t sva, eva; 290 vsize_t len; 291 vaddr_t lva, va; 292 int npgs, i, error; 293 294 if (uio->uio_segflg != UIO_USERSPACE) 295 return (0); 296 297 if (iov->iov_len < (size_t) space) 298 space = iov->iov_len; 299 if (space > SOCK_LOAN_CHUNK) 300 space = SOCK_LOAN_CHUNK; 301 302 eva = round_page((vaddr_t) iov->iov_base + space); 303 sva = trunc_page((vaddr_t) iov->iov_base); 304 len = eva - sva; 305 npgs = len >> PAGE_SHIFT; 306 307 /* XXX KDASSERT */ 308 KASSERT(npgs <= M_EXT_MAXPAGES); 309 310 lva = sokvaalloc(len, so); 311 if (lva == 0) 312 return 0; 313 314 error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, 315 m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); 316 if (error) { 317 sokvafree(lva, len); 318 return (0); 319 } 320 321 for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) 322 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), 323 VM_PROT_READ); 324 pmap_update(pmap_kernel()); 325 326 lva += (vaddr_t) iov->iov_base & PAGE_MASK; 327 328 MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); 329 m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; 330 331 uio->uio_resid -= space; 332 /* uio_offset not updated, not set/used for write(2) */ 333 uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; 334 uio->uio_iov->iov_len -= space; 335 if (uio->uio_iov->iov_len == 0) { 336 uio->uio_iov++; 337 uio->uio_iovcnt--; 338 } 339 340 return (space); 341 } 342 343 /* 344 * Socket operation routines. 345 * These routines are called by the routines in 346 * sys_socket.c or from a system process, and 347 * implement the semantics of socket operations by 348 * switching out to the protocol specific routines. 349 */ 350 /*ARGSUSED*/ 351 int 352 socreate(int dom, struct socket **aso, int type, int proto) 353 { 354 struct proc *p; 355 struct protosw *prp; 356 struct socket *so; 357 int error, s; 358 359 p = curproc; /* XXX */ 360 if (proto) 361 prp = pffindproto(dom, proto, type); 362 else 363 prp = pffindtype(dom, type); 364 if (prp == 0 || prp->pr_usrreq == 0) 365 return (EPROTONOSUPPORT); 366 if (prp->pr_type != type) 367 return (EPROTOTYPE); 368 s = splsoftnet(); 369 so = pool_get(&socket_pool, PR_WAITOK); 370 memset((caddr_t)so, 0, sizeof(*so)); 371 TAILQ_INIT(&so->so_q0); 372 TAILQ_INIT(&so->so_q); 373 so->so_type = type; 374 so->so_proto = prp; 375 so->so_send = sosend; 376 so->so_receive = soreceive; 377 #ifdef MBUFTRACE 378 so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; 379 so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; 380 so->so_mowner = &prp->pr_domain->dom_mowner; 381 #endif 382 if (p != 0) 383 so->so_uid = p->p_ucred->cr_uid; 384 error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, 385 (struct mbuf *)(long)proto, (struct mbuf *)0, p); 386 if (error) { 387 so->so_state |= SS_NOFDREF; 388 sofree(so); 389 splx(s); 390 return (error); 391 } 392 splx(s); 393 *aso = so; 394 return (0); 395 } 396 397 int 398 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 399 { 400 int s, error; 401 402 s = splsoftnet(); 403 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, 404 nam, (struct mbuf *)0, p); 405 splx(s); 406 return (error); 407 } 408 409 int 410 solisten(struct socket *so, int backlog) 411 { 412 int s, error; 413 414 s = splsoftnet(); 415 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, 416 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 417 if (error) { 418 splx(s); 419 return (error); 420 } 421 if (TAILQ_EMPTY(&so->so_q)) 422 so->so_options |= SO_ACCEPTCONN; 423 if (backlog < 0) 424 backlog = 0; 425 so->so_qlimit = min(backlog, somaxconn); 426 splx(s); 427 return (0); 428 } 429 430 void 431 sofree(struct socket *so) 432 { 433 struct mbuf *m; 434 435 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 436 return; 437 if (so->so_head) { 438 /* 439 * We must not decommission a socket that's on the accept(2) 440 * queue. If we do, then accept(2) may hang after select(2) 441 * indicated that the listening socket was ready. 442 */ 443 if (!soqremque(so, 0)) 444 return; 445 } 446 sbrelease(&so->so_snd); 447 sorflush(so); 448 while ((m = so->so_pendfree) != NULL) { 449 so->so_pendfree = m->m_next; 450 m->m_next = so_pendfree; 451 so_pendfree = m; 452 } 453 pool_put(&socket_pool, so); 454 } 455 456 /* 457 * Close a socket on last file table reference removal. 458 * Initiate disconnect if connected. 459 * Free socket when disconnect complete. 460 */ 461 int 462 soclose(struct socket *so) 463 { 464 struct socket *so2; 465 int s, error; 466 467 error = 0; 468 s = splsoftnet(); /* conservative */ 469 if (so->so_options & SO_ACCEPTCONN) { 470 while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { 471 (void) soqremque(so2, 0); 472 (void) soabort(so2); 473 } 474 while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { 475 (void) soqremque(so2, 1); 476 (void) soabort(so2); 477 } 478 } 479 if (so->so_pcb == 0) 480 goto discard; 481 if (so->so_state & SS_ISCONNECTED) { 482 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 483 error = sodisconnect(so); 484 if (error) 485 goto drop; 486 } 487 if (so->so_options & SO_LINGER) { 488 if ((so->so_state & SS_ISDISCONNECTING) && 489 (so->so_state & SS_NBIO)) 490 goto drop; 491 while (so->so_state & SS_ISCONNECTED) { 492 error = tsleep((caddr_t)&so->so_timeo, 493 PSOCK | PCATCH, netcls, 494 so->so_linger * hz); 495 if (error) 496 break; 497 } 498 } 499 } 500 drop: 501 if (so->so_pcb) { 502 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, 503 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 504 (struct proc *)0); 505 if (error == 0) 506 error = error2; 507 } 508 discard: 509 if (so->so_state & SS_NOFDREF) 510 panic("soclose: NOFDREF"); 511 so->so_state |= SS_NOFDREF; 512 sofree(so); 513 splx(s); 514 return (error); 515 } 516 517 /* 518 * Must be called at splsoftnet... 519 */ 520 int 521 soabort(struct socket *so) 522 { 523 524 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, 525 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 526 } 527 528 int 529 soaccept(struct socket *so, struct mbuf *nam) 530 { 531 int s, error; 532 533 error = 0; 534 s = splsoftnet(); 535 if ((so->so_state & SS_NOFDREF) == 0) 536 panic("soaccept: !NOFDREF"); 537 so->so_state &= ~SS_NOFDREF; 538 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 539 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 540 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, 541 (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); 542 else 543 error = ECONNABORTED; 544 545 splx(s); 546 return (error); 547 } 548 549 int 550 soconnect(struct socket *so, struct mbuf *nam) 551 { 552 struct proc *p; 553 int s, error; 554 555 p = curproc; /* XXX */ 556 if (so->so_options & SO_ACCEPTCONN) 557 return (EOPNOTSUPP); 558 s = splsoftnet(); 559 /* 560 * If protocol is connection-based, can only connect once. 561 * Otherwise, if connected, try to disconnect first. 562 * This allows user to disconnect by connecting to, e.g., 563 * a null address. 564 */ 565 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 566 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 567 (error = sodisconnect(so)))) 568 error = EISCONN; 569 else 570 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 571 (struct mbuf *)0, nam, (struct mbuf *)0, p); 572 splx(s); 573 return (error); 574 } 575 576 int 577 soconnect2(struct socket *so1, struct socket *so2) 578 { 579 int s, error; 580 581 s = splsoftnet(); 582 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, 583 (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, 584 (struct proc *)0); 585 splx(s); 586 return (error); 587 } 588 589 int 590 sodisconnect(struct socket *so) 591 { 592 int s, error; 593 594 s = splsoftnet(); 595 if ((so->so_state & SS_ISCONNECTED) == 0) { 596 error = ENOTCONN; 597 goto bad; 598 } 599 if (so->so_state & SS_ISDISCONNECTING) { 600 error = EALREADY; 601 goto bad; 602 } 603 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, 604 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 605 (struct proc *)0); 606 bad: 607 splx(s); 608 sodopendfree(so); 609 return (error); 610 } 611 612 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 613 /* 614 * Send on a socket. 615 * If send must go all at once and message is larger than 616 * send buffering, then hard error. 617 * Lock against other senders. 618 * If must go all at once and not enough room now, then 619 * inform user that this would block and do nothing. 620 * Otherwise, if nonblocking, send as much as possible. 621 * The data to be sent is described by "uio" if nonzero, 622 * otherwise by the mbuf chain "top" (which must be null 623 * if uio is not). Data provided in mbuf chain must be small 624 * enough to send all at once. 625 * 626 * Returns nonzero on error, timeout or signal; callers 627 * must check for short counts if EINTR/ERESTART are returned. 628 * Data and control buffers are freed on return. 629 */ 630 int 631 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 632 struct mbuf *control, int flags) 633 { 634 struct proc *p; 635 struct mbuf **mp, *m; 636 long space, len, resid, clen, mlen; 637 int error, s, dontroute, atomic; 638 639 sodopendfree(so); 640 641 p = curproc; /* XXX */ 642 clen = 0; 643 atomic = sosendallatonce(so) || top; 644 if (uio) 645 resid = uio->uio_resid; 646 else 647 resid = top->m_pkthdr.len; 648 /* 649 * In theory resid should be unsigned. 650 * However, space must be signed, as it might be less than 0 651 * if we over-committed, and we must use a signed comparison 652 * of space and resid. On the other hand, a negative resid 653 * causes us to loop sending 0-length segments to the protocol. 654 */ 655 if (resid < 0) { 656 error = EINVAL; 657 goto out; 658 } 659 dontroute = 660 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 661 (so->so_proto->pr_flags & PR_ATOMIC); 662 p->p_stats->p_ru.ru_msgsnd++; 663 if (control) 664 clen = control->m_len; 665 #define snderr(errno) { error = errno; splx(s); goto release; } 666 667 restart: 668 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 669 goto out; 670 do { 671 s = splsoftnet(); 672 if (so->so_state & SS_CANTSENDMORE) 673 snderr(EPIPE); 674 if (so->so_error) { 675 error = so->so_error; 676 so->so_error = 0; 677 splx(s); 678 goto release; 679 } 680 if ((so->so_state & SS_ISCONNECTED) == 0) { 681 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 682 if ((so->so_state & SS_ISCONFIRMING) == 0 && 683 !(resid == 0 && clen != 0)) 684 snderr(ENOTCONN); 685 } else if (addr == 0) 686 snderr(EDESTADDRREQ); 687 } 688 space = sbspace(&so->so_snd); 689 if (flags & MSG_OOB) 690 space += 1024; 691 if ((atomic && resid > so->so_snd.sb_hiwat) || 692 clen > so->so_snd.sb_hiwat) 693 snderr(EMSGSIZE); 694 if (space < resid + clen && uio && 695 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 696 if (so->so_state & SS_NBIO) 697 snderr(EWOULDBLOCK); 698 sbunlock(&so->so_snd); 699 error = sbwait(&so->so_snd); 700 splx(s); 701 if (error) 702 goto out; 703 goto restart; 704 } 705 splx(s); 706 mp = ⊤ 707 space -= clen; 708 do { 709 if (uio == NULL) { 710 /* 711 * Data is prepackaged in "top". 712 */ 713 resid = 0; 714 if (flags & MSG_EOR) 715 top->m_flags |= M_EOR; 716 } else do { 717 if (top == 0) { 718 m = m_gethdr(M_WAIT, MT_DATA); 719 mlen = MHLEN; 720 m->m_pkthdr.len = 0; 721 m->m_pkthdr.rcvif = (struct ifnet *)0; 722 } else { 723 m = m_get(M_WAIT, MT_DATA); 724 mlen = MLEN; 725 } 726 MCLAIM(m, so->so_snd.sb_mowner); 727 if (use_sosend_loan && 728 uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && 729 space >= SOCK_LOAN_THRESH && 730 (len = sosend_loan(so, uio, m, 731 space)) != 0) { 732 SOSEND_COUNTER_INCR(&sosend_loan_big); 733 space -= len; 734 goto have_data; 735 } 736 if (resid >= MINCLSIZE && space >= MCLBYTES) { 737 SOSEND_COUNTER_INCR(&sosend_copy_big); 738 m_clget(m, M_WAIT); 739 if ((m->m_flags & M_EXT) == 0) 740 goto nopages; 741 mlen = MCLBYTES; 742 if (atomic && top == 0) { 743 len = lmin(MCLBYTES - max_hdr, 744 resid); 745 m->m_data += max_hdr; 746 } else 747 len = lmin(MCLBYTES, resid); 748 space -= len; 749 } else { 750 nopages: 751 SOSEND_COUNTER_INCR(&sosend_copy_small); 752 len = lmin(lmin(mlen, resid), space); 753 space -= len; 754 /* 755 * For datagram protocols, leave room 756 * for protocol headers in first mbuf. 757 */ 758 if (atomic && top == 0 && len < mlen) 759 MH_ALIGN(m, len); 760 } 761 error = uiomove(mtod(m, caddr_t), (int)len, 762 uio); 763 have_data: 764 resid = uio->uio_resid; 765 m->m_len = len; 766 *mp = m; 767 top->m_pkthdr.len += len; 768 if (error) 769 goto release; 770 mp = &m->m_next; 771 if (resid <= 0) { 772 if (flags & MSG_EOR) 773 top->m_flags |= M_EOR; 774 break; 775 } 776 } while (space > 0 && atomic); 777 778 s = splsoftnet(); 779 780 if (so->so_state & SS_CANTSENDMORE) 781 snderr(EPIPE); 782 783 if (dontroute) 784 so->so_options |= SO_DONTROUTE; 785 if (resid > 0) 786 so->so_state |= SS_MORETOCOME; 787 error = (*so->so_proto->pr_usrreq)(so, 788 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 789 top, addr, control, p); 790 if (dontroute) 791 so->so_options &= ~SO_DONTROUTE; 792 if (resid > 0) 793 so->so_state &= ~SS_MORETOCOME; 794 splx(s); 795 796 clen = 0; 797 control = 0; 798 top = 0; 799 mp = ⊤ 800 if (error) 801 goto release; 802 } while (resid && space > 0); 803 } while (resid); 804 805 release: 806 sbunlock(&so->so_snd); 807 out: 808 if (top) 809 m_freem(top); 810 if (control) 811 m_freem(control); 812 return (error); 813 } 814 815 /* 816 * Implement receive operations on a socket. 817 * We depend on the way that records are added to the sockbuf 818 * by sbappend*. In particular, each record (mbufs linked through m_next) 819 * must begin with an address if the protocol so specifies, 820 * followed by an optional mbuf or mbufs containing ancillary data, 821 * and then zero or more mbufs of data. 822 * In order to avoid blocking network interrupts for the entire time here, 823 * we splx() while doing the actual copy to user space. 824 * Although the sockbuf is locked, new data may still be appended, 825 * and thus we must maintain consistency of the sockbuf during that time. 826 * 827 * The caller may receive the data as a single mbuf chain by supplying 828 * an mbuf **mp0 for use in returning the chain. The uio is then used 829 * only for the count in uio_resid. 830 */ 831 int 832 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 833 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 834 { 835 struct mbuf *m, **mp; 836 int flags, len, error, s, offset, moff, type, orig_resid; 837 struct protosw *pr; 838 struct mbuf *nextrecord; 839 int mbuf_removed = 0; 840 841 pr = so->so_proto; 842 mp = mp0; 843 type = 0; 844 orig_resid = uio->uio_resid; 845 if (paddr) 846 *paddr = 0; 847 if (controlp) 848 *controlp = 0; 849 if (flagsp) 850 flags = *flagsp &~ MSG_EOR; 851 else 852 flags = 0; 853 854 if ((flags & MSG_DONTWAIT) == 0) 855 sodopendfree(so); 856 857 if (flags & MSG_OOB) { 858 m = m_get(M_WAIT, MT_DATA); 859 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 860 (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, 861 (struct proc *)0); 862 if (error) 863 goto bad; 864 do { 865 error = uiomove(mtod(m, caddr_t), 866 (int) min(uio->uio_resid, m->m_len), uio); 867 m = m_free(m); 868 } while (uio->uio_resid && error == 0 && m); 869 bad: 870 if (m) 871 m_freem(m); 872 return (error); 873 } 874 if (mp) 875 *mp = (struct mbuf *)0; 876 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 877 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 878 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 879 880 restart: 881 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 882 return (error); 883 s = splsoftnet(); 884 885 m = so->so_rcv.sb_mb; 886 /* 887 * If we have less data than requested, block awaiting more 888 * (subject to any timeout) if: 889 * 1. the current count is less than the low water mark, 890 * 2. MSG_WAITALL is set, and it is possible to do the entire 891 * receive operation at once if we block (resid <= hiwat), or 892 * 3. MSG_DONTWAIT is not set. 893 * If MSG_WAITALL is set but resid is larger than the receive buffer, 894 * we have to do the receive in sections, and thus risk returning 895 * a short count if a timeout or signal occurs after we start. 896 */ 897 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 898 so->so_rcv.sb_cc < uio->uio_resid) && 899 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 900 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 901 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 902 #ifdef DIAGNOSTIC 903 if (m == 0 && so->so_rcv.sb_cc) 904 panic("receive 1"); 905 #endif 906 if (so->so_error) { 907 if (m) 908 goto dontblock; 909 error = so->so_error; 910 if ((flags & MSG_PEEK) == 0) 911 so->so_error = 0; 912 goto release; 913 } 914 if (so->so_state & SS_CANTRCVMORE) { 915 if (m) 916 goto dontblock; 917 else 918 goto release; 919 } 920 for (; m; m = m->m_next) 921 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 922 m = so->so_rcv.sb_mb; 923 goto dontblock; 924 } 925 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 926 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 927 error = ENOTCONN; 928 goto release; 929 } 930 if (uio->uio_resid == 0) 931 goto release; 932 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 933 error = EWOULDBLOCK; 934 goto release; 935 } 936 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 937 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 938 sbunlock(&so->so_rcv); 939 error = sbwait(&so->so_rcv); 940 splx(s); 941 if (error) 942 return (error); 943 goto restart; 944 } 945 dontblock: 946 /* 947 * On entry here, m points to the first record of the socket buffer. 948 * While we process the initial mbufs containing address and control 949 * info, we save a copy of m->m_nextpkt into nextrecord. 950 */ 951 #ifdef notyet /* XXXX */ 952 if (uio->uio_procp) 953 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 954 #endif 955 KASSERT(m == so->so_rcv.sb_mb); 956 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 957 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 958 nextrecord = m->m_nextpkt; 959 if (pr->pr_flags & PR_ADDR) { 960 #ifdef DIAGNOSTIC 961 if (m->m_type != MT_SONAME) 962 panic("receive 1a"); 963 #endif 964 orig_resid = 0; 965 if (flags & MSG_PEEK) { 966 if (paddr) 967 *paddr = m_copy(m, 0, m->m_len); 968 m = m->m_next; 969 } else { 970 sbfree(&so->so_rcv, m); 971 mbuf_removed = 1; 972 if (paddr) { 973 *paddr = m; 974 so->so_rcv.sb_mb = m->m_next; 975 m->m_next = 0; 976 m = so->so_rcv.sb_mb; 977 } else { 978 MFREE(m, so->so_rcv.sb_mb); 979 m = so->so_rcv.sb_mb; 980 } 981 } 982 } 983 while (m && m->m_type == MT_CONTROL && error == 0) { 984 if (flags & MSG_PEEK) { 985 if (controlp) 986 *controlp = m_copy(m, 0, m->m_len); 987 m = m->m_next; 988 } else { 989 sbfree(&so->so_rcv, m); 990 mbuf_removed = 1; 991 if (controlp) { 992 if (pr->pr_domain->dom_externalize && 993 mtod(m, struct cmsghdr *)->cmsg_type == 994 SCM_RIGHTS) 995 error = (*pr->pr_domain->dom_externalize)(m); 996 *controlp = m; 997 so->so_rcv.sb_mb = m->m_next; 998 m->m_next = 0; 999 m = so->so_rcv.sb_mb; 1000 } else { 1001 MFREE(m, so->so_rcv.sb_mb); 1002 m = so->so_rcv.sb_mb; 1003 } 1004 } 1005 if (controlp) { 1006 orig_resid = 0; 1007 controlp = &(*controlp)->m_next; 1008 } 1009 } 1010 1011 /* 1012 * If m is non-NULL, we have some data to read. From now on, 1013 * make sure to keep sb_lastrecord consistent when working on 1014 * the last packet on the chain (nextrecord == NULL) and we 1015 * change m->m_nextpkt. 1016 */ 1017 if (m) { 1018 if ((flags & MSG_PEEK) == 0) { 1019 m->m_nextpkt = nextrecord; 1020 /* 1021 * If nextrecord == NULL (this is a single chain), 1022 * then sb_lastrecord may not be valid here if m 1023 * was changed earlier. 1024 */ 1025 if (nextrecord == NULL) { 1026 KASSERT(so->so_rcv.sb_mb == m); 1027 so->so_rcv.sb_lastrecord = m; 1028 } 1029 } 1030 type = m->m_type; 1031 if (type == MT_OOBDATA) 1032 flags |= MSG_OOB; 1033 } else { 1034 if ((flags & MSG_PEEK) == 0) { 1035 KASSERT(so->so_rcv.sb_mb == m); 1036 so->so_rcv.sb_mb = nextrecord; 1037 SB_EMPTY_FIXUP(&so->so_rcv); 1038 } 1039 } 1040 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1041 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1042 1043 moff = 0; 1044 offset = 0; 1045 while (m && uio->uio_resid > 0 && error == 0) { 1046 if (m->m_type == MT_OOBDATA) { 1047 if (type != MT_OOBDATA) 1048 break; 1049 } else if (type == MT_OOBDATA) 1050 break; 1051 #ifdef DIAGNOSTIC 1052 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 1053 panic("receive 3"); 1054 #endif 1055 so->so_state &= ~SS_RCVATMARK; 1056 len = uio->uio_resid; 1057 if (so->so_oobmark && len > so->so_oobmark - offset) 1058 len = so->so_oobmark - offset; 1059 if (len > m->m_len - moff) 1060 len = m->m_len - moff; 1061 /* 1062 * If mp is set, just pass back the mbufs. 1063 * Otherwise copy them out via the uio, then free. 1064 * Sockbuf must be consistent here (points to current mbuf, 1065 * it points to next record) when we drop priority; 1066 * we must note any additions to the sockbuf when we 1067 * block interrupts again. 1068 */ 1069 if (mp == 0) { 1070 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1071 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1072 splx(s); 1073 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); 1074 s = splsoftnet(); 1075 if (error) { 1076 /* 1077 * If any part of the record has been removed 1078 * (such as the MT_SONAME mbuf, which will 1079 * happen when PR_ADDR, and thus also 1080 * PR_ATOMIC, is set), then drop the entire 1081 * record to maintain the atomicity of the 1082 * receive operation. 1083 * 1084 * This avoids a later panic("receive 1a") 1085 * when compiled with DIAGNOSTIC. 1086 */ 1087 if (m && mbuf_removed 1088 && (pr->pr_flags & PR_ATOMIC)) 1089 (void) sbdroprecord(&so->so_rcv); 1090 1091 goto release; 1092 } 1093 } else 1094 uio->uio_resid -= len; 1095 if (len == m->m_len - moff) { 1096 if (m->m_flags & M_EOR) 1097 flags |= MSG_EOR; 1098 if (flags & MSG_PEEK) { 1099 m = m->m_next; 1100 moff = 0; 1101 } else { 1102 nextrecord = m->m_nextpkt; 1103 sbfree(&so->so_rcv, m); 1104 if (mp) { 1105 *mp = m; 1106 mp = &m->m_next; 1107 so->so_rcv.sb_mb = m = m->m_next; 1108 *mp = (struct mbuf *)0; 1109 } else { 1110 MFREE(m, so->so_rcv.sb_mb); 1111 m = so->so_rcv.sb_mb; 1112 } 1113 /* 1114 * If m != NULL, we also know that 1115 * so->so_rcv.sb_mb != NULL. 1116 */ 1117 KASSERT(so->so_rcv.sb_mb == m); 1118 if (m) { 1119 m->m_nextpkt = nextrecord; 1120 if (nextrecord == NULL) 1121 so->so_rcv.sb_lastrecord = m; 1122 } else { 1123 so->so_rcv.sb_mb = nextrecord; 1124 SB_EMPTY_FIXUP(&so->so_rcv); 1125 } 1126 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1127 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1128 } 1129 } else { 1130 if (flags & MSG_PEEK) 1131 moff += len; 1132 else { 1133 if (mp) 1134 *mp = m_copym(m, 0, len, M_WAIT); 1135 m->m_data += len; 1136 m->m_len -= len; 1137 so->so_rcv.sb_cc -= len; 1138 } 1139 } 1140 if (so->so_oobmark) { 1141 if ((flags & MSG_PEEK) == 0) { 1142 so->so_oobmark -= len; 1143 if (so->so_oobmark == 0) { 1144 so->so_state |= SS_RCVATMARK; 1145 break; 1146 } 1147 } else { 1148 offset += len; 1149 if (offset == so->so_oobmark) 1150 break; 1151 } 1152 } 1153 if (flags & MSG_EOR) 1154 break; 1155 /* 1156 * If the MSG_WAITALL flag is set (for non-atomic socket), 1157 * we must not quit until "uio->uio_resid == 0" or an error 1158 * termination. If a signal/timeout occurs, return 1159 * with a short count but without error. 1160 * Keep sockbuf locked against other readers. 1161 */ 1162 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 1163 !sosendallatonce(so) && !nextrecord) { 1164 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1165 break; 1166 /* 1167 * If we are peeking and the socket receive buffer is 1168 * full, stop since we can't get more data to peek at. 1169 */ 1170 if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) 1171 break; 1172 /* 1173 * If we've drained the socket buffer, tell the 1174 * protocol in case it needs to do something to 1175 * get it filled again. 1176 */ 1177 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1178 (*pr->pr_usrreq)(so, PRU_RCVD, 1179 (struct mbuf *)0, 1180 (struct mbuf *)(long)flags, 1181 (struct mbuf *)0, 1182 (struct proc *)0); 1183 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1184 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1185 error = sbwait(&so->so_rcv); 1186 if (error) { 1187 sbunlock(&so->so_rcv); 1188 splx(s); 1189 return (0); 1190 } 1191 if ((m = so->so_rcv.sb_mb) != NULL) 1192 nextrecord = m->m_nextpkt; 1193 } 1194 } 1195 1196 if (m && pr->pr_flags & PR_ATOMIC) { 1197 flags |= MSG_TRUNC; 1198 if ((flags & MSG_PEEK) == 0) 1199 (void) sbdroprecord(&so->so_rcv); 1200 } 1201 if ((flags & MSG_PEEK) == 0) { 1202 if (m == 0) { 1203 /* 1204 * First part is an inline SB_EMPTY_FIXUP(). Second 1205 * part makes sure sb_lastrecord is up-to-date if 1206 * there is still data in the socket buffer. 1207 */ 1208 so->so_rcv.sb_mb = nextrecord; 1209 if (so->so_rcv.sb_mb == NULL) { 1210 so->so_rcv.sb_mbtail = NULL; 1211 so->so_rcv.sb_lastrecord = NULL; 1212 } else if (nextrecord->m_nextpkt == NULL) 1213 so->so_rcv.sb_lastrecord = nextrecord; 1214 } 1215 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1216 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1217 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1218 (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, 1219 (struct mbuf *)(long)flags, (struct mbuf *)0, 1220 (struct proc *)0); 1221 } 1222 if (orig_resid == uio->uio_resid && orig_resid && 1223 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1224 sbunlock(&so->so_rcv); 1225 splx(s); 1226 goto restart; 1227 } 1228 1229 if (flagsp) 1230 *flagsp |= flags; 1231 release: 1232 sbunlock(&so->so_rcv); 1233 splx(s); 1234 return (error); 1235 } 1236 1237 int 1238 soshutdown(struct socket *so, int how) 1239 { 1240 struct protosw *pr; 1241 1242 pr = so->so_proto; 1243 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1244 return (EINVAL); 1245 1246 if (how == SHUT_RD || how == SHUT_RDWR) 1247 sorflush(so); 1248 if (how == SHUT_WR || how == SHUT_RDWR) 1249 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, 1250 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 1251 return (0); 1252 } 1253 1254 void 1255 sorflush(struct socket *so) 1256 { 1257 struct sockbuf *sb, asb; 1258 struct protosw *pr; 1259 int s; 1260 1261 sb = &so->so_rcv; 1262 pr = so->so_proto; 1263 sb->sb_flags |= SB_NOINTR; 1264 (void) sblock(sb, M_WAITOK); 1265 s = splnet(); 1266 socantrcvmore(so); 1267 sbunlock(sb); 1268 asb = *sb; 1269 memset((caddr_t)sb, 0, sizeof(*sb)); 1270 splx(s); 1271 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1272 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 1273 sbrelease(&asb); 1274 } 1275 1276 int 1277 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1278 { 1279 int error; 1280 struct mbuf *m; 1281 1282 error = 0; 1283 m = m0; 1284 if (level != SOL_SOCKET) { 1285 if (so->so_proto && so->so_proto->pr_ctloutput) 1286 return ((*so->so_proto->pr_ctloutput) 1287 (PRCO_SETOPT, so, level, optname, &m0)); 1288 error = ENOPROTOOPT; 1289 } else { 1290 switch (optname) { 1291 1292 case SO_LINGER: 1293 if (m == NULL || m->m_len != sizeof(struct linger)) { 1294 error = EINVAL; 1295 goto bad; 1296 } 1297 so->so_linger = mtod(m, struct linger *)->l_linger; 1298 /* fall thru... */ 1299 1300 case SO_DEBUG: 1301 case SO_KEEPALIVE: 1302 case SO_DONTROUTE: 1303 case SO_USELOOPBACK: 1304 case SO_BROADCAST: 1305 case SO_REUSEADDR: 1306 case SO_REUSEPORT: 1307 case SO_OOBINLINE: 1308 case SO_TIMESTAMP: 1309 if (m == NULL || m->m_len < sizeof(int)) { 1310 error = EINVAL; 1311 goto bad; 1312 } 1313 if (*mtod(m, int *)) 1314 so->so_options |= optname; 1315 else 1316 so->so_options &= ~optname; 1317 break; 1318 1319 case SO_SNDBUF: 1320 case SO_RCVBUF: 1321 case SO_SNDLOWAT: 1322 case SO_RCVLOWAT: 1323 { 1324 int optval; 1325 1326 if (m == NULL || m->m_len < sizeof(int)) { 1327 error = EINVAL; 1328 goto bad; 1329 } 1330 1331 /* 1332 * Values < 1 make no sense for any of these 1333 * options, so disallow them. 1334 */ 1335 optval = *mtod(m, int *); 1336 if (optval < 1) { 1337 error = EINVAL; 1338 goto bad; 1339 } 1340 1341 switch (optname) { 1342 1343 case SO_SNDBUF: 1344 case SO_RCVBUF: 1345 if (sbreserve(optname == SO_SNDBUF ? 1346 &so->so_snd : &so->so_rcv, 1347 (u_long) optval) == 0) { 1348 error = ENOBUFS; 1349 goto bad; 1350 } 1351 break; 1352 1353 /* 1354 * Make sure the low-water is never greater than 1355 * the high-water. 1356 */ 1357 case SO_SNDLOWAT: 1358 so->so_snd.sb_lowat = 1359 (optval > so->so_snd.sb_hiwat) ? 1360 so->so_snd.sb_hiwat : optval; 1361 break; 1362 case SO_RCVLOWAT: 1363 so->so_rcv.sb_lowat = 1364 (optval > so->so_rcv.sb_hiwat) ? 1365 so->so_rcv.sb_hiwat : optval; 1366 break; 1367 } 1368 break; 1369 } 1370 1371 case SO_SNDTIMEO: 1372 case SO_RCVTIMEO: 1373 { 1374 struct timeval *tv; 1375 short val; 1376 1377 if (m == NULL || m->m_len < sizeof(*tv)) { 1378 error = EINVAL; 1379 goto bad; 1380 } 1381 tv = mtod(m, struct timeval *); 1382 if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { 1383 error = EDOM; 1384 goto bad; 1385 } 1386 val = tv->tv_sec * hz + tv->tv_usec / tick; 1387 if (val == 0 && tv->tv_usec != 0) 1388 val = 1; 1389 1390 switch (optname) { 1391 1392 case SO_SNDTIMEO: 1393 so->so_snd.sb_timeo = val; 1394 break; 1395 case SO_RCVTIMEO: 1396 so->so_rcv.sb_timeo = val; 1397 break; 1398 } 1399 break; 1400 } 1401 1402 default: 1403 error = ENOPROTOOPT; 1404 break; 1405 } 1406 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1407 (void) ((*so->so_proto->pr_ctloutput) 1408 (PRCO_SETOPT, so, level, optname, &m0)); 1409 m = NULL; /* freed by protocol */ 1410 } 1411 } 1412 bad: 1413 if (m) 1414 (void) m_free(m); 1415 return (error); 1416 } 1417 1418 int 1419 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1420 { 1421 struct mbuf *m; 1422 1423 if (level != SOL_SOCKET) { 1424 if (so->so_proto && so->so_proto->pr_ctloutput) { 1425 return ((*so->so_proto->pr_ctloutput) 1426 (PRCO_GETOPT, so, level, optname, mp)); 1427 } else 1428 return (ENOPROTOOPT); 1429 } else { 1430 m = m_get(M_WAIT, MT_SOOPTS); 1431 m->m_len = sizeof(int); 1432 1433 switch (optname) { 1434 1435 case SO_LINGER: 1436 m->m_len = sizeof(struct linger); 1437 mtod(m, struct linger *)->l_onoff = 1438 so->so_options & SO_LINGER; 1439 mtod(m, struct linger *)->l_linger = so->so_linger; 1440 break; 1441 1442 case SO_USELOOPBACK: 1443 case SO_DONTROUTE: 1444 case SO_DEBUG: 1445 case SO_KEEPALIVE: 1446 case SO_REUSEADDR: 1447 case SO_REUSEPORT: 1448 case SO_BROADCAST: 1449 case SO_OOBINLINE: 1450 case SO_TIMESTAMP: 1451 *mtod(m, int *) = so->so_options & optname; 1452 break; 1453 1454 case SO_TYPE: 1455 *mtod(m, int *) = so->so_type; 1456 break; 1457 1458 case SO_ERROR: 1459 *mtod(m, int *) = so->so_error; 1460 so->so_error = 0; 1461 break; 1462 1463 case SO_SNDBUF: 1464 *mtod(m, int *) = so->so_snd.sb_hiwat; 1465 break; 1466 1467 case SO_RCVBUF: 1468 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1469 break; 1470 1471 case SO_SNDLOWAT: 1472 *mtod(m, int *) = so->so_snd.sb_lowat; 1473 break; 1474 1475 case SO_RCVLOWAT: 1476 *mtod(m, int *) = so->so_rcv.sb_lowat; 1477 break; 1478 1479 case SO_SNDTIMEO: 1480 case SO_RCVTIMEO: 1481 { 1482 int val = (optname == SO_SNDTIMEO ? 1483 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1484 1485 m->m_len = sizeof(struct timeval); 1486 mtod(m, struct timeval *)->tv_sec = val / hz; 1487 mtod(m, struct timeval *)->tv_usec = 1488 (val % hz) * tick; 1489 break; 1490 } 1491 1492 default: 1493 (void)m_free(m); 1494 return (ENOPROTOOPT); 1495 } 1496 *mp = m; 1497 return (0); 1498 } 1499 } 1500 1501 void 1502 sohasoutofband(struct socket *so) 1503 { 1504 struct proc *p; 1505 1506 if (so->so_pgid < 0) 1507 gsignal(-so->so_pgid, SIGURG); 1508 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 1509 psignal(p, SIGURG); 1510 selwakeup(&so->so_rcv.sb_sel); 1511 } 1512 1513 static void 1514 filt_sordetach(struct knote *kn) 1515 { 1516 struct socket *so; 1517 1518 so = (struct socket *)kn->kn_fp->f_data; 1519 SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); 1520 if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) 1521 so->so_rcv.sb_flags &= ~SB_KNOTE; 1522 } 1523 1524 /*ARGSUSED*/ 1525 static int 1526 filt_soread(struct knote *kn, long hint) 1527 { 1528 struct socket *so; 1529 1530 so = (struct socket *)kn->kn_fp->f_data; 1531 kn->kn_data = so->so_rcv.sb_cc; 1532 if (so->so_state & SS_CANTRCVMORE) { 1533 kn->kn_flags |= EV_EOF; 1534 kn->kn_fflags = so->so_error; 1535 return (1); 1536 } 1537 if (so->so_error) /* temporary udp error */ 1538 return (1); 1539 if (kn->kn_sfflags & NOTE_LOWAT) 1540 return (kn->kn_data >= kn->kn_sdata); 1541 return (kn->kn_data >= so->so_rcv.sb_lowat); 1542 } 1543 1544 static void 1545 filt_sowdetach(struct knote *kn) 1546 { 1547 struct socket *so; 1548 1549 so = (struct socket *)kn->kn_fp->f_data; 1550 SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); 1551 if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) 1552 so->so_snd.sb_flags &= ~SB_KNOTE; 1553 } 1554 1555 /*ARGSUSED*/ 1556 static int 1557 filt_sowrite(struct knote *kn, long hint) 1558 { 1559 struct socket *so; 1560 1561 so = (struct socket *)kn->kn_fp->f_data; 1562 kn->kn_data = sbspace(&so->so_snd); 1563 if (so->so_state & SS_CANTSENDMORE) { 1564 kn->kn_flags |= EV_EOF; 1565 kn->kn_fflags = so->so_error; 1566 return (1); 1567 } 1568 if (so->so_error) /* temporary udp error */ 1569 return (1); 1570 if (((so->so_state & SS_ISCONNECTED) == 0) && 1571 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1572 return (0); 1573 if (kn->kn_sfflags & NOTE_LOWAT) 1574 return (kn->kn_data >= kn->kn_sdata); 1575 return (kn->kn_data >= so->so_snd.sb_lowat); 1576 } 1577 1578 /*ARGSUSED*/ 1579 static int 1580 filt_solisten(struct knote *kn, long hint) 1581 { 1582 struct socket *so; 1583 1584 so = (struct socket *)kn->kn_fp->f_data; 1585 1586 /* 1587 * Set kn_data to number of incoming connections, not 1588 * counting partial (incomplete) connections. 1589 */ 1590 kn->kn_data = so->so_qlen; 1591 return (kn->kn_data > 0); 1592 } 1593 1594 static const struct filterops solisten_filtops = 1595 { 1, NULL, filt_sordetach, filt_solisten }; 1596 static const struct filterops soread_filtops = 1597 { 1, NULL, filt_sordetach, filt_soread }; 1598 static const struct filterops sowrite_filtops = 1599 { 1, NULL, filt_sowdetach, filt_sowrite }; 1600 1601 int 1602 soo_kqfilter(struct file *fp, struct knote *kn) 1603 { 1604 struct socket *so; 1605 struct sockbuf *sb; 1606 1607 so = (struct socket *)kn->kn_fp->f_data; 1608 switch (kn->kn_filter) { 1609 case EVFILT_READ: 1610 if (so->so_options & SO_ACCEPTCONN) 1611 kn->kn_fop = &solisten_filtops; 1612 else 1613 kn->kn_fop = &soread_filtops; 1614 sb = &so->so_rcv; 1615 break; 1616 case EVFILT_WRITE: 1617 kn->kn_fop = &sowrite_filtops; 1618 sb = &so->so_snd; 1619 break; 1620 default: 1621 return (1); 1622 } 1623 SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); 1624 sb->sb_flags |= SB_KNOTE; 1625 return (0); 1626 } 1627 1628