1 /* $NetBSD: uipc_socket2.c,v 1.126 2017/07/06 17:42:39 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.126 2017/07/06 17:42:39 christos Exp $"); 62 63 #ifdef _KERNEL_OPT 64 #include "opt_mbuftrace.h" 65 #include "opt_sb_max.h" 66 #endif 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/proc.h> 71 #include <sys/file.h> 72 #include <sys/buf.h> 73 #include <sys/mbuf.h> 74 #include <sys/protosw.h> 75 #include <sys/domain.h> 76 #include <sys/poll.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/signalvar.h> 80 #include <sys/kauth.h> 81 #include <sys/pool.h> 82 #include <sys/uidinfo.h> 83 84 /* 85 * Primitive routines for operating on sockets and socket buffers. 86 * 87 * Connection life-cycle: 88 * 89 * Normal sequence from the active (originating) side: 90 * 91 * - soisconnecting() is called during processing of connect() call, 92 * - resulting in an eventual call to soisconnected() if/when the 93 * connection is established. 94 * 95 * When the connection is torn down during processing of disconnect(): 96 * 97 * - soisdisconnecting() is called and, 98 * - soisdisconnected() is called when the connection to the peer 99 * is totally severed. 100 * 101 * The semantics of these routines are such that connectionless protocols 102 * can call soisconnected() and soisdisconnected() only, bypassing the 103 * in-progress calls when setting up a ``connection'' takes no time. 104 * 105 * From the passive side, a socket is created with two queues of sockets: 106 * 107 * - so_q0 (0) for partial connections (i.e. connections in progress) 108 * - so_q (1) for connections already made and awaiting user acceptance. 109 * 110 * As a protocol is preparing incoming connections, it creates a socket 111 * structure queued on so_q0 by calling sonewconn(). When the connection 112 * is established, soisconnected() is called, and transfers the 113 * socket structure to so_q, making it available to accept(). 114 * 115 * If a socket is closed with sockets on either so_q0 or so_q, these 116 * sockets are dropped. 117 * 118 * Locking rules and assumptions: 119 * 120 * o socket::so_lock can change on the fly. The low level routines used 121 * to lock sockets are aware of this. When so_lock is acquired, the 122 * routine locking must check to see if so_lock still points to the 123 * lock that was acquired. If so_lock has changed in the meantime, the 124 * now irrelevant lock that was acquired must be dropped and the lock 125 * operation retried. Although not proven here, this is completely safe 126 * on a multiprocessor system, even with relaxed memory ordering, given 127 * the next two rules: 128 * 129 * o In order to mutate so_lock, the lock pointed to by the current value 130 * of so_lock must be held: i.e., the socket must be held locked by the 131 * changing thread. The thread must issue membar_exit() to prevent 132 * memory accesses being reordered, and can set so_lock to the desired 133 * value. If the lock pointed to by the new value of so_lock is not 134 * held by the changing thread, the socket must then be considered 135 * unlocked. 136 * 137 * o If so_lock is mutated, and the previous lock referred to by so_lock 138 * could still be visible to other threads in the system (e.g. via file 139 * descriptor or protocol-internal reference), then the old lock must 140 * remain valid until the socket and/or protocol control block has been 141 * torn down. 142 * 143 * o If a socket has a non-NULL so_head value (i.e. is in the process of 144 * connecting), then locking the socket must also lock the socket pointed 145 * to by so_head: their lock pointers must match. 146 * 147 * o If a socket has connections in progress (so_q, so_q0 not empty) then 148 * locking the socket must also lock the sockets attached to both queues. 149 * Again, their lock pointers must match. 150 * 151 * o Beyond the initial lock assignment in socreate(), assigning locks to 152 * sockets is the responsibility of the individual protocols / protocol 153 * domains. 154 */ 155 156 static pool_cache_t socket_cache; 157 u_long sb_max = SB_MAX;/* maximum socket buffer size */ 158 static u_long sb_max_adj; /* adjusted sb_max */ 159 160 void 161 soisconnecting(struct socket *so) 162 { 163 164 KASSERT(solocked(so)); 165 166 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 167 so->so_state |= SS_ISCONNECTING; 168 } 169 170 void 171 soisconnected(struct socket *so) 172 { 173 struct socket *head; 174 175 head = so->so_head; 176 177 KASSERT(solocked(so)); 178 KASSERT(head == NULL || solocked2(so, head)); 179 180 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING); 181 so->so_state |= SS_ISCONNECTED; 182 if (head && so->so_onq == &head->so_q0) { 183 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 184 /* 185 * Re-enqueue and wake up any waiters, e.g. 186 * processes blocking on accept(). 187 */ 188 soqremque(so, 0); 189 soqinsque(head, so, 1); 190 sorwakeup(head); 191 cv_broadcast(&head->so_cv); 192 } else { 193 so->so_upcall = 194 head->so_accf->so_accept_filter->accf_callback; 195 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 196 so->so_rcv.sb_flags |= SB_UPCALL; 197 so->so_options &= ~SO_ACCEPTFILTER; 198 (*so->so_upcall)(so, so->so_upcallarg, 199 POLLIN|POLLRDNORM, M_DONTWAIT); 200 } 201 } else { 202 cv_broadcast(&so->so_cv); 203 sorwakeup(so); 204 sowwakeup(so); 205 } 206 } 207 208 void 209 soisdisconnecting(struct socket *so) 210 { 211 212 KASSERT(solocked(so)); 213 214 so->so_state &= ~SS_ISCONNECTING; 215 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 216 cv_broadcast(&so->so_cv); 217 sowwakeup(so); 218 sorwakeup(so); 219 } 220 221 void 222 soisdisconnected(struct socket *so) 223 { 224 225 KASSERT(solocked(so)); 226 227 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 228 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 229 cv_broadcast(&so->so_cv); 230 sowwakeup(so); 231 sorwakeup(so); 232 } 233 234 void 235 soinit2(void) 236 { 237 238 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 239 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 240 } 241 242 /* 243 * sonewconn: accept a new connection. 244 * 245 * When an attempt at a new connection is noted on a socket which accepts 246 * connections, sonewconn(9) is called. If the connection is possible 247 * (subject to space constraints, etc) then we allocate a new structure, 248 * properly linked into the data structure of the original socket. 249 * 250 * => If 'soready' is true, then socket will become ready for accept() i.e. 251 * inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken. 252 * => May be called from soft-interrupt context. 253 * => Listening socket should be locked. 254 * => Returns the new socket locked. 255 */ 256 struct socket * 257 sonewconn(struct socket *head, bool soready) 258 { 259 struct socket *so; 260 int soqueue, error; 261 262 KASSERT(solocked(head)); 263 264 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) { 265 /* 266 * Listen queue overflow. If there is an accept filter 267 * active, pass through the oldest cxn it's handling. 268 */ 269 if (head->so_accf == NULL) { 270 return NULL; 271 } else { 272 struct socket *so2, *next; 273 274 /* Pass the oldest connection waiting in the 275 accept filter */ 276 for (so2 = TAILQ_FIRST(&head->so_q0); 277 so2 != NULL; so2 = next) { 278 next = TAILQ_NEXT(so2, so_qe); 279 if (so2->so_upcall == NULL) { 280 continue; 281 } 282 so2->so_upcall = NULL; 283 so2->so_upcallarg = NULL; 284 so2->so_options &= ~SO_ACCEPTFILTER; 285 so2->so_rcv.sb_flags &= ~SB_UPCALL; 286 soisconnected(so2); 287 break; 288 } 289 290 /* If nothing was nudged out of the acept filter, bail 291 * out; otherwise proceed allocating the socket. */ 292 if (so2 == NULL) { 293 return NULL; 294 } 295 } 296 } 297 if ((head->so_options & SO_ACCEPTFILTER) != 0) { 298 soready = false; 299 } 300 soqueue = soready ? 1 : 0; 301 302 if ((so = soget(false)) == NULL) { 303 return NULL; 304 } 305 so->so_type = head->so_type; 306 so->so_options = head->so_options & ~SO_ACCEPTCONN; 307 so->so_linger = head->so_linger; 308 so->so_state = head->so_state | SS_NOFDREF; 309 so->so_proto = head->so_proto; 310 so->so_timeo = head->so_timeo; 311 so->so_pgid = head->so_pgid; 312 so->so_send = head->so_send; 313 so->so_receive = head->so_receive; 314 so->so_uidinfo = head->so_uidinfo; 315 so->so_cpid = head->so_cpid; 316 317 /* 318 * Share the lock with the listening-socket, it may get unshared 319 * once the connection is complete. 320 */ 321 mutex_obj_hold(head->so_lock); 322 so->so_lock = head->so_lock; 323 324 /* 325 * Reserve the space for socket buffers. 326 */ 327 #ifdef MBUFTRACE 328 so->so_mowner = head->so_mowner; 329 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 330 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 331 #endif 332 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { 333 goto out; 334 } 335 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 336 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 337 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 338 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 339 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 340 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 341 342 /* 343 * Finally, perform the protocol attach. Note: a new socket 344 * lock may be assigned at this point (if so, it will be held). 345 */ 346 error = (*so->so_proto->pr_usrreqs->pr_attach)(so, 0); 347 if (error) { 348 out: 349 KASSERT(solocked(so)); 350 KASSERT(so->so_accf == NULL); 351 soput(so); 352 353 /* Note: the listening socket shall stay locked. */ 354 KASSERT(solocked(head)); 355 return NULL; 356 } 357 KASSERT(solocked2(head, so)); 358 359 /* 360 * Insert into the queue. If ready, update the connection status 361 * and wake up any waiters, e.g. processes blocking on accept(). 362 */ 363 soqinsque(head, so, soqueue); 364 if (soready) { 365 so->so_state |= SS_ISCONNECTED; 366 sorwakeup(head); 367 cv_broadcast(&head->so_cv); 368 } 369 return so; 370 } 371 372 struct socket * 373 soget(bool waitok) 374 { 375 struct socket *so; 376 377 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 378 if (__predict_false(so == NULL)) 379 return (NULL); 380 memset(so, 0, sizeof(*so)); 381 TAILQ_INIT(&so->so_q0); 382 TAILQ_INIT(&so->so_q); 383 cv_init(&so->so_cv, "socket"); 384 cv_init(&so->so_rcv.sb_cv, "netio"); 385 cv_init(&so->so_snd.sb_cv, "netio"); 386 selinit(&so->so_rcv.sb_sel); 387 selinit(&so->so_snd.sb_sel); 388 so->so_rcv.sb_so = so; 389 so->so_snd.sb_so = so; 390 return so; 391 } 392 393 void 394 soput(struct socket *so) 395 { 396 397 KASSERT(!cv_has_waiters(&so->so_cv)); 398 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 399 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 400 seldestroy(&so->so_rcv.sb_sel); 401 seldestroy(&so->so_snd.sb_sel); 402 mutex_obj_free(so->so_lock); 403 cv_destroy(&so->so_cv); 404 cv_destroy(&so->so_rcv.sb_cv); 405 cv_destroy(&so->so_snd.sb_cv); 406 pool_cache_put(socket_cache, so); 407 } 408 409 /* 410 * soqinsque: insert socket of a new connection into the specified 411 * accept queue of the listening socket (head). 412 * 413 * q = 0: queue of partial connections 414 * q = 1: queue of incoming connections 415 */ 416 void 417 soqinsque(struct socket *head, struct socket *so, int q) 418 { 419 KASSERT(q == 0 || q == 1); 420 KASSERT(solocked2(head, so)); 421 KASSERT(so->so_onq == NULL); 422 KASSERT(so->so_head == NULL); 423 424 so->so_head = head; 425 if (q == 0) { 426 head->so_q0len++; 427 so->so_onq = &head->so_q0; 428 } else { 429 head->so_qlen++; 430 so->so_onq = &head->so_q; 431 } 432 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 433 } 434 435 /* 436 * soqremque: remove socket from the specified queue. 437 * 438 * => Returns true if socket was removed from the specified queue. 439 * => False if socket was not removed (because it was in other queue). 440 */ 441 bool 442 soqremque(struct socket *so, int q) 443 { 444 struct socket *head = so->so_head; 445 446 KASSERT(q == 0 || q == 1); 447 KASSERT(solocked(so)); 448 KASSERT(so->so_onq != NULL); 449 KASSERT(head != NULL); 450 451 if (q == 0) { 452 if (so->so_onq != &head->so_q0) 453 return false; 454 head->so_q0len--; 455 } else { 456 if (so->so_onq != &head->so_q) 457 return false; 458 head->so_qlen--; 459 } 460 KASSERT(solocked2(so, head)); 461 TAILQ_REMOVE(so->so_onq, so, so_qe); 462 so->so_onq = NULL; 463 so->so_head = NULL; 464 return true; 465 } 466 467 /* 468 * socantsendmore: indicates that no more data will be sent on the 469 * socket; it would normally be applied to a socket when the user 470 * informs the system that no more data is to be sent, by the protocol 471 * code (in case pr_shutdown()). 472 */ 473 void 474 socantsendmore(struct socket *so) 475 { 476 KASSERT(solocked(so)); 477 478 so->so_state |= SS_CANTSENDMORE; 479 sowwakeup(so); 480 } 481 482 /* 483 * socantrcvmore(): indicates that no more data will be received and 484 * will normally be applied to the socket by a protocol when it detects 485 * that the peer will send no more data. Data queued for reading in 486 * the socket may yet be read. 487 */ 488 void 489 socantrcvmore(struct socket *so) 490 { 491 KASSERT(solocked(so)); 492 493 so->so_state |= SS_CANTRCVMORE; 494 sorwakeup(so); 495 } 496 497 /* 498 * Wait for data to arrive at/drain from a socket buffer. 499 */ 500 int 501 sbwait(struct sockbuf *sb) 502 { 503 struct socket *so; 504 kmutex_t *lock; 505 int error; 506 507 so = sb->sb_so; 508 509 KASSERT(solocked(so)); 510 511 sb->sb_flags |= SB_NOTIFY; 512 lock = so->so_lock; 513 if ((sb->sb_flags & SB_NOINTR) != 0) 514 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 515 else 516 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 517 if (__predict_false(lock != so->so_lock)) 518 solockretry(so, lock); 519 return error; 520 } 521 522 /* 523 * Wakeup processes waiting on a socket buffer. 524 * Do asynchronous notification via SIGIO 525 * if the socket buffer has the SB_ASYNC flag set. 526 */ 527 void 528 sowakeup(struct socket *so, struct sockbuf *sb, int code) 529 { 530 int band; 531 532 KASSERT(solocked(so)); 533 KASSERT(sb->sb_so == so); 534 535 if (code == POLL_IN) 536 band = POLLIN|POLLRDNORM; 537 else 538 band = POLLOUT|POLLWRNORM; 539 sb->sb_flags &= ~SB_NOTIFY; 540 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 541 cv_broadcast(&sb->sb_cv); 542 if (sb->sb_flags & SB_ASYNC) 543 fownsignal(so->so_pgid, SIGIO, code, band, so); 544 if (sb->sb_flags & SB_UPCALL) 545 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 546 } 547 548 /* 549 * Reset a socket's lock pointer. Wake all threads waiting on the 550 * socket's condition variables so that they can restart their waits 551 * using the new lock. The existing lock must be held. 552 */ 553 void 554 solockreset(struct socket *so, kmutex_t *lock) 555 { 556 557 KASSERT(solocked(so)); 558 559 so->so_lock = lock; 560 cv_broadcast(&so->so_snd.sb_cv); 561 cv_broadcast(&so->so_rcv.sb_cv); 562 cv_broadcast(&so->so_cv); 563 } 564 565 /* 566 * Socket buffer (struct sockbuf) utility routines. 567 * 568 * Each socket contains two socket buffers: one for sending data and 569 * one for receiving data. Each buffer contains a queue of mbufs, 570 * information about the number of mbufs and amount of data in the 571 * queue, and other fields allowing poll() statements and notification 572 * on data availability to be implemented. 573 * 574 * Data stored in a socket buffer is maintained as a list of records. 575 * Each record is a list of mbufs chained together with the m_next 576 * field. Records are chained together with the m_nextpkt field. The upper 577 * level routine soreceive() expects the following conventions to be 578 * observed when placing information in the receive buffer: 579 * 580 * 1. If the protocol requires each message be preceded by the sender's 581 * name, then a record containing that name must be present before 582 * any associated data (mbuf's must be of type MT_SONAME). 583 * 2. If the protocol supports the exchange of ``access rights'' (really 584 * just additional data associated with the message), and there are 585 * ``rights'' to be received, then a record containing this data 586 * should be present (mbuf's must be of type MT_CONTROL). 587 * 3. If a name or rights record exists, then it must be followed by 588 * a data record, perhaps of zero length. 589 * 590 * Before using a new socket structure it is first necessary to reserve 591 * buffer space to the socket, by calling sbreserve(). This should commit 592 * some of the available buffer space in the system buffer pool for the 593 * socket (currently, it does nothing but enforce limits). The space 594 * should be released by calling sbrelease() when the socket is destroyed. 595 */ 596 597 int 598 sb_max_set(u_long new_sbmax) 599 { 600 int s; 601 602 if (new_sbmax < (16 * 1024)) 603 return (EINVAL); 604 605 s = splsoftnet(); 606 sb_max = new_sbmax; 607 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 608 splx(s); 609 610 return (0); 611 } 612 613 int 614 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 615 { 616 KASSERT(so->so_pcb == NULL || solocked(so)); 617 618 /* 619 * there's at least one application (a configure script of screen) 620 * which expects a fifo is writable even if it has "some" bytes 621 * in its buffer. 622 * so we want to make sure (hiwat - lowat) >= (some bytes). 623 * 624 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 625 * we expect it's large enough for such applications. 626 */ 627 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 628 u_long hiwat = lowat + PIPE_BUF; 629 630 if (sndcc < hiwat) 631 sndcc = hiwat; 632 if (sbreserve(&so->so_snd, sndcc, so) == 0) 633 goto bad; 634 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 635 goto bad2; 636 if (so->so_rcv.sb_lowat == 0) 637 so->so_rcv.sb_lowat = 1; 638 if (so->so_snd.sb_lowat == 0) 639 so->so_snd.sb_lowat = lowat; 640 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 641 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 642 return (0); 643 bad2: 644 sbrelease(&so->so_snd, so); 645 bad: 646 return (ENOBUFS); 647 } 648 649 /* 650 * Allot mbufs to a sockbuf. 651 * Attempt to scale mbmax so that mbcnt doesn't become limiting 652 * if buffering efficiency is near the normal case. 653 */ 654 int 655 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 656 { 657 struct lwp *l = curlwp; /* XXX */ 658 rlim_t maxcc; 659 struct uidinfo *uidinfo; 660 661 KASSERT(so->so_pcb == NULL || solocked(so)); 662 KASSERT(sb->sb_so == so); 663 KASSERT(sb_max_adj != 0); 664 665 if (cc == 0 || cc > sb_max_adj) 666 return (0); 667 668 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 669 670 uidinfo = so->so_uidinfo; 671 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 672 return 0; 673 sb->sb_mbmax = min(cc * 2, sb_max); 674 if (sb->sb_lowat > sb->sb_hiwat) 675 sb->sb_lowat = sb->sb_hiwat; 676 return (1); 677 } 678 679 /* 680 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 681 * that the socket is held locked here: see sorflush(). 682 */ 683 void 684 sbrelease(struct sockbuf *sb, struct socket *so) 685 { 686 687 KASSERT(sb->sb_so == so); 688 689 sbflush(sb); 690 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 691 sb->sb_mbmax = 0; 692 } 693 694 /* 695 * Routines to add and remove 696 * data from an mbuf queue. 697 * 698 * The routines sbappend() or sbappendrecord() are normally called to 699 * append new mbufs to a socket buffer, after checking that adequate 700 * space is available, comparing the function sbspace() with the amount 701 * of data to be added. sbappendrecord() differs from sbappend() in 702 * that data supplied is treated as the beginning of a new record. 703 * To place a sender's address, optional access rights, and data in a 704 * socket receive buffer, sbappendaddr() should be used. To place 705 * access rights and data in a socket receive buffer, sbappendrights() 706 * should be used. In either case, the new data begins a new record. 707 * Note that unlike sbappend() and sbappendrecord(), these routines check 708 * for the caller that there will be enough space to store the data. 709 * Each fails if there is not enough space, or if it cannot find mbufs 710 * to store additional information in. 711 * 712 * Reliable protocols may use the socket send buffer to hold data 713 * awaiting acknowledgement. Data is normally copied from a socket 714 * send buffer in a protocol with m_copy for output to a peer, 715 * and then removing the data from the socket buffer with sbdrop() 716 * or sbdroprecord() when the data is acknowledged by the peer. 717 */ 718 719 #ifdef SOCKBUF_DEBUG 720 void 721 sblastrecordchk(struct sockbuf *sb, const char *where) 722 { 723 struct mbuf *m = sb->sb_mb; 724 725 KASSERT(solocked(sb->sb_so)); 726 727 while (m && m->m_nextpkt) 728 m = m->m_nextpkt; 729 730 if (m != sb->sb_lastrecord) { 731 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 732 sb->sb_mb, sb->sb_lastrecord, m); 733 printf("packet chain:\n"); 734 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 735 printf("\t%p\n", m); 736 panic("sblastrecordchk from %s", where); 737 } 738 } 739 740 void 741 sblastmbufchk(struct sockbuf *sb, const char *where) 742 { 743 struct mbuf *m = sb->sb_mb; 744 struct mbuf *n; 745 746 KASSERT(solocked(sb->sb_so)); 747 748 while (m && m->m_nextpkt) 749 m = m->m_nextpkt; 750 751 while (m && m->m_next) 752 m = m->m_next; 753 754 if (m != sb->sb_mbtail) { 755 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 756 sb->sb_mb, sb->sb_mbtail, m); 757 printf("packet tree:\n"); 758 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 759 printf("\t"); 760 for (n = m; n != NULL; n = n->m_next) 761 printf("%p ", n); 762 printf("\n"); 763 } 764 panic("sblastmbufchk from %s", where); 765 } 766 } 767 #endif /* SOCKBUF_DEBUG */ 768 769 /* 770 * Link a chain of records onto a socket buffer 771 */ 772 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 773 do { \ 774 if ((sb)->sb_lastrecord != NULL) \ 775 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 776 else \ 777 (sb)->sb_mb = (m0); \ 778 (sb)->sb_lastrecord = (mlast); \ 779 } while (/*CONSTCOND*/0) 780 781 782 #define SBLINKRECORD(sb, m0) \ 783 SBLINKRECORDCHAIN(sb, m0, m0) 784 785 /* 786 * Append mbuf chain m to the last record in the 787 * socket buffer sb. The additional space associated 788 * the mbuf chain is recorded in sb. Empty mbufs are 789 * discarded and mbufs are compacted where possible. 790 */ 791 void 792 sbappend(struct sockbuf *sb, struct mbuf *m) 793 { 794 struct mbuf *n; 795 796 KASSERT(solocked(sb->sb_so)); 797 798 if (m == NULL) 799 return; 800 801 #ifdef MBUFTRACE 802 m_claimm(m, sb->sb_mowner); 803 #endif 804 805 SBLASTRECORDCHK(sb, "sbappend 1"); 806 807 if ((n = sb->sb_lastrecord) != NULL) { 808 /* 809 * XXX Would like to simply use sb_mbtail here, but 810 * XXX I need to verify that I won't miss an EOR that 811 * XXX way. 812 */ 813 do { 814 if (n->m_flags & M_EOR) { 815 sbappendrecord(sb, m); /* XXXXXX!!!! */ 816 return; 817 } 818 } while (n->m_next && (n = n->m_next)); 819 } else { 820 /* 821 * If this is the first record in the socket buffer, it's 822 * also the last record. 823 */ 824 sb->sb_lastrecord = m; 825 } 826 sbcompress(sb, m, n); 827 SBLASTRECORDCHK(sb, "sbappend 2"); 828 } 829 830 /* 831 * This version of sbappend() should only be used when the caller 832 * absolutely knows that there will never be more than one record 833 * in the socket buffer, that is, a stream protocol (such as TCP). 834 */ 835 void 836 sbappendstream(struct sockbuf *sb, struct mbuf *m) 837 { 838 839 KASSERT(solocked(sb->sb_so)); 840 KDASSERT(m->m_nextpkt == NULL); 841 KASSERT(sb->sb_mb == sb->sb_lastrecord); 842 843 SBLASTMBUFCHK(sb, __func__); 844 845 #ifdef MBUFTRACE 846 m_claimm(m, sb->sb_mowner); 847 #endif 848 849 sbcompress(sb, m, sb->sb_mbtail); 850 851 sb->sb_lastrecord = sb->sb_mb; 852 SBLASTRECORDCHK(sb, __func__); 853 } 854 855 #ifdef SOCKBUF_DEBUG 856 void 857 sbcheck(struct sockbuf *sb) 858 { 859 struct mbuf *m, *m2; 860 u_long len, mbcnt; 861 862 KASSERT(solocked(sb->sb_so)); 863 864 len = 0; 865 mbcnt = 0; 866 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 867 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 868 len += m2->m_len; 869 mbcnt += MSIZE; 870 if (m2->m_flags & M_EXT) 871 mbcnt += m2->m_ext.ext_size; 872 if (m2->m_nextpkt != NULL) 873 panic("sbcheck nextpkt"); 874 } 875 } 876 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 877 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 878 mbcnt, sb->sb_mbcnt); 879 panic("sbcheck"); 880 } 881 } 882 #endif 883 884 /* 885 * As above, except the mbuf chain 886 * begins a new record. 887 */ 888 void 889 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 890 { 891 struct mbuf *m; 892 893 KASSERT(solocked(sb->sb_so)); 894 895 if (m0 == NULL) 896 return; 897 898 #ifdef MBUFTRACE 899 m_claimm(m0, sb->sb_mowner); 900 #endif 901 /* 902 * Put the first mbuf on the queue. 903 * Note this permits zero length records. 904 */ 905 sballoc(sb, m0); 906 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 907 SBLINKRECORD(sb, m0); 908 m = m0->m_next; 909 m0->m_next = 0; 910 if (m && (m0->m_flags & M_EOR)) { 911 m0->m_flags &= ~M_EOR; 912 m->m_flags |= M_EOR; 913 } 914 sbcompress(sb, m, m0); 915 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 916 } 917 918 /* 919 * As above except that OOB data 920 * is inserted at the beginning of the sockbuf, 921 * but after any other OOB data. 922 */ 923 void 924 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 925 { 926 struct mbuf *m, **mp; 927 928 KASSERT(solocked(sb->sb_so)); 929 930 if (m0 == NULL) 931 return; 932 933 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 934 935 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 936 again: 937 switch (m->m_type) { 938 939 case MT_OOBDATA: 940 continue; /* WANT next train */ 941 942 case MT_CONTROL: 943 if ((m = m->m_next) != NULL) 944 goto again; /* inspect THIS train further */ 945 } 946 break; 947 } 948 /* 949 * Put the first mbuf on the queue. 950 * Note this permits zero length records. 951 */ 952 sballoc(sb, m0); 953 m0->m_nextpkt = *mp; 954 if (*mp == NULL) { 955 /* m0 is actually the new tail */ 956 sb->sb_lastrecord = m0; 957 } 958 *mp = m0; 959 m = m0->m_next; 960 m0->m_next = 0; 961 if (m && (m0->m_flags & M_EOR)) { 962 m0->m_flags &= ~M_EOR; 963 m->m_flags |= M_EOR; 964 } 965 sbcompress(sb, m, m0); 966 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 967 } 968 969 /* 970 * Append address and data, and optionally, control (ancillary) data 971 * to the receive queue of a socket. If present, 972 * m0 must include a packet header with total length. 973 * Returns 0 if no space in sockbuf or insufficient mbufs. 974 */ 975 int 976 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 977 struct mbuf *control) 978 { 979 struct mbuf *m, *n, *nlast; 980 int space, len; 981 982 KASSERT(solocked(sb->sb_so)); 983 984 space = asa->sa_len; 985 986 if (m0 != NULL) { 987 if ((m0->m_flags & M_PKTHDR) == 0) 988 panic("sbappendaddr"); 989 space += m0->m_pkthdr.len; 990 #ifdef MBUFTRACE 991 m_claimm(m0, sb->sb_mowner); 992 #endif 993 } 994 for (n = control; n; n = n->m_next) { 995 space += n->m_len; 996 MCLAIM(n, sb->sb_mowner); 997 if (n->m_next == NULL) /* keep pointer to last control buf */ 998 break; 999 } 1000 if (space > sbspace(sb)) 1001 return (0); 1002 m = m_get(M_DONTWAIT, MT_SONAME); 1003 if (m == NULL) 1004 return (0); 1005 MCLAIM(m, sb->sb_mowner); 1006 /* 1007 * XXX avoid 'comparison always true' warning which isn't easily 1008 * avoided. 1009 */ 1010 len = asa->sa_len; 1011 if (len > MLEN) { 1012 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 1013 if ((m->m_flags & M_EXT) == 0) { 1014 m_free(m); 1015 return (0); 1016 } 1017 } 1018 m->m_len = asa->sa_len; 1019 memcpy(mtod(m, void *), asa, asa->sa_len); 1020 if (n) 1021 n->m_next = m0; /* concatenate data to control */ 1022 else 1023 control = m0; 1024 m->m_next = control; 1025 1026 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 1027 1028 for (n = m; n->m_next != NULL; n = n->m_next) 1029 sballoc(sb, n); 1030 sballoc(sb, n); 1031 nlast = n; 1032 SBLINKRECORD(sb, m); 1033 1034 sb->sb_mbtail = nlast; 1035 SBLASTMBUFCHK(sb, "sbappendaddr"); 1036 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 1037 1038 return (1); 1039 } 1040 1041 /* 1042 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 1043 * an mbuf chain. 1044 */ 1045 static inline struct mbuf * 1046 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 1047 const struct sockaddr *asa) 1048 { 1049 struct mbuf *m; 1050 const int salen = asa->sa_len; 1051 1052 KASSERT(solocked(sb->sb_so)); 1053 1054 /* only the first in each chain need be a pkthdr */ 1055 m = m_gethdr(M_DONTWAIT, MT_SONAME); 1056 if (m == NULL) 1057 return NULL; 1058 MCLAIM(m, sb->sb_mowner); 1059 #ifdef notyet 1060 if (salen > MHLEN) { 1061 MEXTMALLOC(m, salen, M_NOWAIT); 1062 if ((m->m_flags & M_EXT) == 0) { 1063 m_free(m); 1064 return NULL; 1065 } 1066 } 1067 #else 1068 KASSERT(salen <= MHLEN); 1069 #endif 1070 m->m_len = salen; 1071 memcpy(mtod(m, void *), asa, salen); 1072 m->m_next = m0; 1073 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1074 1075 return m; 1076 } 1077 1078 int 1079 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1080 struct mbuf *m0, int sbprio) 1081 { 1082 struct mbuf *m, *n, *n0, *nlast; 1083 int error; 1084 1085 KASSERT(solocked(sb->sb_so)); 1086 1087 /* 1088 * XXX sbprio reserved for encoding priority of this* request: 1089 * SB_PRIO_NONE --> honour normal sb limits 1090 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1091 * take whole chain. Intended for large requests 1092 * that should be delivered atomically (all, or none). 1093 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1094 * over normal socket limits, for messages indicating 1095 * buffer overflow in earlier normal/lower-priority messages 1096 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1097 * Intended for kernel-generated messages only. 1098 * Up to generator to avoid total mbuf resource exhaustion. 1099 */ 1100 (void)sbprio; 1101 1102 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1103 panic("sbappendaddrchain"); 1104 1105 #ifdef notyet 1106 space = sbspace(sb); 1107 1108 /* 1109 * Enforce SB_PRIO_* limits as described above. 1110 */ 1111 #endif 1112 1113 n0 = NULL; 1114 nlast = NULL; 1115 for (m = m0; m; m = m->m_nextpkt) { 1116 struct mbuf *np; 1117 1118 #ifdef MBUFTRACE 1119 m_claimm(m, sb->sb_mowner); 1120 #endif 1121 1122 /* Prepend sockaddr to this record (m) of input chain m0 */ 1123 n = m_prepend_sockaddr(sb, m, asa); 1124 if (n == NULL) { 1125 error = ENOBUFS; 1126 goto bad; 1127 } 1128 1129 /* Append record (asa+m) to end of new chain n0 */ 1130 if (n0 == NULL) { 1131 n0 = n; 1132 } else { 1133 nlast->m_nextpkt = n; 1134 } 1135 /* Keep track of last record on new chain */ 1136 nlast = n; 1137 1138 for (np = n; np; np = np->m_next) 1139 sballoc(sb, np); 1140 } 1141 1142 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1143 1144 /* Drop the entire chain of (asa+m) records onto the socket */ 1145 SBLINKRECORDCHAIN(sb, n0, nlast); 1146 1147 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1148 1149 for (m = nlast; m->m_next; m = m->m_next) 1150 ; 1151 sb->sb_mbtail = m; 1152 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1153 1154 return (1); 1155 1156 bad: 1157 /* 1158 * On error, free the prepended addreseses. For consistency 1159 * with sbappendaddr(), leave it to our caller to free 1160 * the input record chain passed to us as m0. 1161 */ 1162 while ((n = n0) != NULL) { 1163 struct mbuf *np; 1164 1165 /* Undo the sballoc() of this record */ 1166 for (np = n; np; np = np->m_next) 1167 sbfree(sb, np); 1168 1169 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1170 np = m_free(n); /* free prepended address (not data) */ 1171 } 1172 return error; 1173 } 1174 1175 1176 int 1177 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1178 { 1179 struct mbuf *m, *mlast, *n; 1180 int space; 1181 1182 KASSERT(solocked(sb->sb_so)); 1183 1184 space = 0; 1185 if (control == NULL) 1186 panic("sbappendcontrol"); 1187 for (m = control; ; m = m->m_next) { 1188 space += m->m_len; 1189 MCLAIM(m, sb->sb_mowner); 1190 if (m->m_next == NULL) 1191 break; 1192 } 1193 n = m; /* save pointer to last control buffer */ 1194 for (m = m0; m; m = m->m_next) { 1195 MCLAIM(m, sb->sb_mowner); 1196 space += m->m_len; 1197 } 1198 if (space > sbspace(sb)) 1199 return (0); 1200 n->m_next = m0; /* concatenate data to control */ 1201 1202 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1203 1204 for (m = control; m->m_next != NULL; m = m->m_next) 1205 sballoc(sb, m); 1206 sballoc(sb, m); 1207 mlast = m; 1208 SBLINKRECORD(sb, control); 1209 1210 sb->sb_mbtail = mlast; 1211 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1212 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1213 1214 return (1); 1215 } 1216 1217 /* 1218 * Compress mbuf chain m into the socket 1219 * buffer sb following mbuf n. If n 1220 * is null, the buffer is presumed empty. 1221 */ 1222 void 1223 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1224 { 1225 int eor; 1226 struct mbuf *o; 1227 1228 KASSERT(solocked(sb->sb_so)); 1229 1230 eor = 0; 1231 while (m) { 1232 eor |= m->m_flags & M_EOR; 1233 if (m->m_len == 0 && 1234 (eor == 0 || 1235 (((o = m->m_next) || (o = n)) && 1236 o->m_type == m->m_type))) { 1237 if (sb->sb_lastrecord == m) 1238 sb->sb_lastrecord = m->m_next; 1239 m = m_free(m); 1240 continue; 1241 } 1242 if (n && (n->m_flags & M_EOR) == 0 && 1243 /* M_TRAILINGSPACE() checks buffer writeability */ 1244 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1245 m->m_len <= M_TRAILINGSPACE(n) && 1246 n->m_type == m->m_type) { 1247 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1248 (unsigned)m->m_len); 1249 n->m_len += m->m_len; 1250 sb->sb_cc += m->m_len; 1251 m = m_free(m); 1252 continue; 1253 } 1254 if (n) 1255 n->m_next = m; 1256 else 1257 sb->sb_mb = m; 1258 sb->sb_mbtail = m; 1259 sballoc(sb, m); 1260 n = m; 1261 m->m_flags &= ~M_EOR; 1262 m = m->m_next; 1263 n->m_next = 0; 1264 } 1265 if (eor) { 1266 if (n) 1267 n->m_flags |= eor; 1268 else 1269 printf("semi-panic: sbcompress\n"); 1270 } 1271 SBLASTMBUFCHK(sb, __func__); 1272 } 1273 1274 /* 1275 * Free all mbufs in a sockbuf. 1276 * Check that all resources are reclaimed. 1277 */ 1278 void 1279 sbflush(struct sockbuf *sb) 1280 { 1281 1282 KASSERT(solocked(sb->sb_so)); 1283 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1284 1285 while (sb->sb_mbcnt) 1286 sbdrop(sb, (int)sb->sb_cc); 1287 1288 KASSERT(sb->sb_cc == 0); 1289 KASSERT(sb->sb_mb == NULL); 1290 KASSERT(sb->sb_mbtail == NULL); 1291 KASSERT(sb->sb_lastrecord == NULL); 1292 } 1293 1294 /* 1295 * Drop data from (the front of) a sockbuf. 1296 */ 1297 void 1298 sbdrop(struct sockbuf *sb, int len) 1299 { 1300 struct mbuf *m, *next; 1301 1302 KASSERT(solocked(sb->sb_so)); 1303 1304 next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; 1305 while (len > 0) { 1306 if (m == NULL) { 1307 if (next == NULL) 1308 panic("sbdrop(%p,%d): cc=%lu", 1309 sb, len, sb->sb_cc); 1310 m = next; 1311 next = m->m_nextpkt; 1312 continue; 1313 } 1314 if (m->m_len > len) { 1315 m->m_len -= len; 1316 m->m_data += len; 1317 sb->sb_cc -= len; 1318 break; 1319 } 1320 len -= m->m_len; 1321 sbfree(sb, m); 1322 m = m_free(m); 1323 } 1324 while (m && m->m_len == 0) { 1325 sbfree(sb, m); 1326 m = m_free(m); 1327 } 1328 if (m) { 1329 sb->sb_mb = m; 1330 m->m_nextpkt = next; 1331 } else 1332 sb->sb_mb = next; 1333 /* 1334 * First part is an inline SB_EMPTY_FIXUP(). Second part 1335 * makes sure sb_lastrecord is up-to-date if we dropped 1336 * part of the last record. 1337 */ 1338 m = sb->sb_mb; 1339 if (m == NULL) { 1340 sb->sb_mbtail = NULL; 1341 sb->sb_lastrecord = NULL; 1342 } else if (m->m_nextpkt == NULL) 1343 sb->sb_lastrecord = m; 1344 } 1345 1346 /* 1347 * Drop a record off the front of a sockbuf 1348 * and move the next record to the front. 1349 */ 1350 void 1351 sbdroprecord(struct sockbuf *sb) 1352 { 1353 struct mbuf *m, *mn; 1354 1355 KASSERT(solocked(sb->sb_so)); 1356 1357 m = sb->sb_mb; 1358 if (m) { 1359 sb->sb_mb = m->m_nextpkt; 1360 do { 1361 sbfree(sb, m); 1362 mn = m_free(m); 1363 } while ((m = mn) != NULL); 1364 } 1365 SB_EMPTY_FIXUP(sb); 1366 } 1367 1368 /* 1369 * Create a "control" mbuf containing the specified data 1370 * with the specified type for presentation on a socket buffer. 1371 */ 1372 struct mbuf * 1373 sbcreatecontrol1(void **p, int size, int type, int level, int flags) 1374 { 1375 struct cmsghdr *cp; 1376 struct mbuf *m; 1377 int space = CMSG_SPACE(size); 1378 1379 if ((flags & M_DONTWAIT) && space > MCLBYTES) { 1380 printf("%s: message too large %d\n", __func__, space); 1381 return NULL; 1382 } 1383 1384 if ((m = m_get(flags, MT_CONTROL)) == NULL) 1385 return NULL; 1386 if (space > MLEN) { 1387 if (space > MCLBYTES) 1388 MEXTMALLOC(m, space, M_WAITOK); 1389 else 1390 MCLGET(m, flags); 1391 if ((m->m_flags & M_EXT) == 0) { 1392 m_free(m); 1393 return NULL; 1394 } 1395 } 1396 cp = mtod(m, struct cmsghdr *); 1397 *p = CMSG_DATA(cp); 1398 m->m_len = space; 1399 cp->cmsg_len = CMSG_LEN(size); 1400 cp->cmsg_level = level; 1401 cp->cmsg_type = type; 1402 return m; 1403 } 1404 1405 struct mbuf * 1406 sbcreatecontrol(void *p, int size, int type, int level) 1407 { 1408 struct mbuf *m; 1409 void *v; 1410 1411 m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT); 1412 if (m == NULL) 1413 return NULL; 1414 memcpy(v, p, size); 1415 return m; 1416 } 1417 1418 void 1419 solockretry(struct socket *so, kmutex_t *lock) 1420 { 1421 1422 while (lock != so->so_lock) { 1423 mutex_exit(lock); 1424 lock = so->so_lock; 1425 mutex_enter(lock); 1426 } 1427 } 1428 1429 bool 1430 solocked(struct socket *so) 1431 { 1432 1433 return mutex_owned(so->so_lock); 1434 } 1435 1436 bool 1437 solocked2(struct socket *so1, struct socket *so2) 1438 { 1439 kmutex_t *lock; 1440 1441 lock = so1->so_lock; 1442 if (lock != so2->so_lock) 1443 return false; 1444 return mutex_owned(lock); 1445 } 1446 1447 /* 1448 * sosetlock: assign a default lock to a new socket. 1449 */ 1450 void 1451 sosetlock(struct socket *so) 1452 { 1453 if (so->so_lock == NULL) { 1454 kmutex_t *lock = softnet_lock; 1455 1456 so->so_lock = lock; 1457 mutex_obj_hold(lock); 1458 mutex_enter(lock); 1459 } 1460 KASSERT(solocked(so)); 1461 } 1462 1463 /* 1464 * Set lock on sockbuf sb; sleep if lock is already held. 1465 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1466 * Returns error without lock if sleep is interrupted. 1467 */ 1468 int 1469 sblock(struct sockbuf *sb, int wf) 1470 { 1471 struct socket *so; 1472 kmutex_t *lock; 1473 int error; 1474 1475 KASSERT(solocked(sb->sb_so)); 1476 1477 for (;;) { 1478 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1479 sb->sb_flags |= SB_LOCK; 1480 return 0; 1481 } 1482 if (wf != M_WAITOK) 1483 return EWOULDBLOCK; 1484 so = sb->sb_so; 1485 lock = so->so_lock; 1486 if ((sb->sb_flags & SB_NOINTR) != 0) { 1487 cv_wait(&so->so_cv, lock); 1488 error = 0; 1489 } else 1490 error = cv_wait_sig(&so->so_cv, lock); 1491 if (__predict_false(lock != so->so_lock)) 1492 solockretry(so, lock); 1493 if (error != 0) 1494 return error; 1495 } 1496 } 1497 1498 void 1499 sbunlock(struct sockbuf *sb) 1500 { 1501 struct socket *so; 1502 1503 so = sb->sb_so; 1504 1505 KASSERT(solocked(so)); 1506 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1507 1508 sb->sb_flags &= ~SB_LOCK; 1509 cv_broadcast(&so->so_cv); 1510 } 1511 1512 int 1513 sowait(struct socket *so, bool catch_p, int timo) 1514 { 1515 kmutex_t *lock; 1516 int error; 1517 1518 KASSERT(solocked(so)); 1519 KASSERT(catch_p || timo != 0); 1520 1521 lock = so->so_lock; 1522 if (catch_p) 1523 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1524 else 1525 error = cv_timedwait(&so->so_cv, lock, timo); 1526 if (__predict_false(lock != so->so_lock)) 1527 solockretry(so, lock); 1528 return error; 1529 } 1530