1 /* $NetBSD: uipc_socket2.c,v 1.122 2015/08/24 22:21:26 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.122 2015/08/24 22:21:26 pooka Exp $"); 62 63 #ifdef _KERNEL_OPT 64 #include "opt_mbuftrace.h" 65 #include "opt_sb_max.h" 66 #endif 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/proc.h> 71 #include <sys/file.h> 72 #include <sys/buf.h> 73 #include <sys/mbuf.h> 74 #include <sys/protosw.h> 75 #include <sys/domain.h> 76 #include <sys/poll.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/signalvar.h> 80 #include <sys/kauth.h> 81 #include <sys/pool.h> 82 #include <sys/uidinfo.h> 83 84 /* 85 * Primitive routines for operating on sockets and socket buffers. 86 * 87 * Connection life-cycle: 88 * 89 * Normal sequence from the active (originating) side: 90 * 91 * - soisconnecting() is called during processing of connect() call, 92 * - resulting in an eventual call to soisconnected() if/when the 93 * connection is established. 94 * 95 * When the connection is torn down during processing of disconnect(): 96 * 97 * - soisdisconnecting() is called and, 98 * - soisdisconnected() is called when the connection to the peer 99 * is totally severed. 100 * 101 * The semantics of these routines are such that connectionless protocols 102 * can call soisconnected() and soisdisconnected() only, bypassing the 103 * in-progress calls when setting up a ``connection'' takes no time. 104 * 105 * From the passive side, a socket is created with two queues of sockets: 106 * 107 * - so_q0 (0) for partial connections (i.e. connections in progress) 108 * - so_q (1) for connections already made and awaiting user acceptance. 109 * 110 * As a protocol is preparing incoming connections, it creates a socket 111 * structure queued on so_q0 by calling sonewconn(). When the connection 112 * is established, soisconnected() is called, and transfers the 113 * socket structure to so_q, making it available to accept(). 114 * 115 * If a socket is closed with sockets on either so_q0 or so_q, these 116 * sockets are dropped. 117 * 118 * Locking rules and assumptions: 119 * 120 * o socket::so_lock can change on the fly. The low level routines used 121 * to lock sockets are aware of this. When so_lock is acquired, the 122 * routine locking must check to see if so_lock still points to the 123 * lock that was acquired. If so_lock has changed in the meantime, the 124 * now irrelevant lock that was acquired must be dropped and the lock 125 * operation retried. Although not proven here, this is completely safe 126 * on a multiprocessor system, even with relaxed memory ordering, given 127 * the next two rules: 128 * 129 * o In order to mutate so_lock, the lock pointed to by the current value 130 * of so_lock must be held: i.e., the socket must be held locked by the 131 * changing thread. The thread must issue membar_exit() to prevent 132 * memory accesses being reordered, and can set so_lock to the desired 133 * value. If the lock pointed to by the new value of so_lock is not 134 * held by the changing thread, the socket must then be considered 135 * unlocked. 136 * 137 * o If so_lock is mutated, and the previous lock referred to by so_lock 138 * could still be visible to other threads in the system (e.g. via file 139 * descriptor or protocol-internal reference), then the old lock must 140 * remain valid until the socket and/or protocol control block has been 141 * torn down. 142 * 143 * o If a socket has a non-NULL so_head value (i.e. is in the process of 144 * connecting), then locking the socket must also lock the socket pointed 145 * to by so_head: their lock pointers must match. 146 * 147 * o If a socket has connections in progress (so_q, so_q0 not empty) then 148 * locking the socket must also lock the sockets attached to both queues. 149 * Again, their lock pointers must match. 150 * 151 * o Beyond the initial lock assignment in socreate(), assigning locks to 152 * sockets is the responsibility of the individual protocols / protocol 153 * domains. 154 */ 155 156 static pool_cache_t socket_cache; 157 u_long sb_max = SB_MAX;/* maximum socket buffer size */ 158 static u_long sb_max_adj; /* adjusted sb_max */ 159 160 void 161 soisconnecting(struct socket *so) 162 { 163 164 KASSERT(solocked(so)); 165 166 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 167 so->so_state |= SS_ISCONNECTING; 168 } 169 170 void 171 soisconnected(struct socket *so) 172 { 173 struct socket *head; 174 175 head = so->so_head; 176 177 KASSERT(solocked(so)); 178 KASSERT(head == NULL || solocked2(so, head)); 179 180 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING); 181 so->so_state |= SS_ISCONNECTED; 182 if (head && so->so_onq == &head->so_q0) { 183 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 184 /* 185 * Re-enqueue and wake up any waiters, e.g. 186 * processes blocking on accept(). 187 */ 188 soqremque(so, 0); 189 soqinsque(head, so, 1); 190 sorwakeup(head); 191 cv_broadcast(&head->so_cv); 192 } else { 193 so->so_upcall = 194 head->so_accf->so_accept_filter->accf_callback; 195 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 196 so->so_rcv.sb_flags |= SB_UPCALL; 197 so->so_options &= ~SO_ACCEPTFILTER; 198 (*so->so_upcall)(so, so->so_upcallarg, 199 POLLIN|POLLRDNORM, M_DONTWAIT); 200 } 201 } else { 202 cv_broadcast(&so->so_cv); 203 sorwakeup(so); 204 sowwakeup(so); 205 } 206 } 207 208 void 209 soisdisconnecting(struct socket *so) 210 { 211 212 KASSERT(solocked(so)); 213 214 so->so_state &= ~SS_ISCONNECTING; 215 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 216 cv_broadcast(&so->so_cv); 217 sowwakeup(so); 218 sorwakeup(so); 219 } 220 221 void 222 soisdisconnected(struct socket *so) 223 { 224 225 KASSERT(solocked(so)); 226 227 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 228 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 229 cv_broadcast(&so->so_cv); 230 sowwakeup(so); 231 sorwakeup(so); 232 } 233 234 void 235 soinit2(void) 236 { 237 238 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 239 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 240 } 241 242 /* 243 * sonewconn: accept a new connection. 244 * 245 * When an attempt at a new connection is noted on a socket which accepts 246 * connections, sonewconn(9) is called. If the connection is possible 247 * (subject to space constraints, etc) then we allocate a new structure, 248 * properly linked into the data structure of the original socket. 249 * 250 * => If 'soready' is true, then socket will become ready for accept() i.e. 251 * inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken. 252 * => May be called from soft-interrupt context. 253 * => Listening socket should be locked. 254 * => Returns the new socket locked. 255 */ 256 struct socket * 257 sonewconn(struct socket *head, bool soready) 258 { 259 struct socket *so; 260 int soqueue, error; 261 262 KASSERT(solocked(head)); 263 264 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) { 265 /* Listen queue overflow. */ 266 return NULL; 267 } 268 if ((head->so_options & SO_ACCEPTFILTER) != 0) { 269 soready = false; 270 } 271 soqueue = soready ? 1 : 0; 272 273 if ((so = soget(false)) == NULL) { 274 return NULL; 275 } 276 so->so_type = head->so_type; 277 so->so_options = head->so_options & ~SO_ACCEPTCONN; 278 so->so_linger = head->so_linger; 279 so->so_state = head->so_state | SS_NOFDREF; 280 so->so_proto = head->so_proto; 281 so->so_timeo = head->so_timeo; 282 so->so_pgid = head->so_pgid; 283 so->so_send = head->so_send; 284 so->so_receive = head->so_receive; 285 so->so_uidinfo = head->so_uidinfo; 286 so->so_cpid = head->so_cpid; 287 288 /* 289 * Share the lock with the listening-socket, it may get unshared 290 * once the connection is complete. 291 */ 292 mutex_obj_hold(head->so_lock); 293 so->so_lock = head->so_lock; 294 295 /* 296 * Reserve the space for socket buffers. 297 */ 298 #ifdef MBUFTRACE 299 so->so_mowner = head->so_mowner; 300 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 301 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 302 #endif 303 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { 304 goto out; 305 } 306 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 307 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 308 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 309 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 310 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 311 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 312 313 /* 314 * Finally, perform the protocol attach. Note: a new socket 315 * lock may be assigned at this point (if so, it will be held). 316 */ 317 error = (*so->so_proto->pr_usrreqs->pr_attach)(so, 0); 318 if (error) { 319 out: 320 KASSERT(solocked(so)); 321 KASSERT(so->so_accf == NULL); 322 soput(so); 323 324 /* Note: the listening socket shall stay locked. */ 325 KASSERT(solocked(head)); 326 return NULL; 327 } 328 KASSERT(solocked2(head, so)); 329 330 /* 331 * Insert into the queue. If ready, update the connection status 332 * and wake up any waiters, e.g. processes blocking on accept(). 333 */ 334 soqinsque(head, so, soqueue); 335 if (soready) { 336 so->so_state |= SS_ISCONNECTED; 337 sorwakeup(head); 338 cv_broadcast(&head->so_cv); 339 } 340 return so; 341 } 342 343 struct socket * 344 soget(bool waitok) 345 { 346 struct socket *so; 347 348 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 349 if (__predict_false(so == NULL)) 350 return (NULL); 351 memset(so, 0, sizeof(*so)); 352 TAILQ_INIT(&so->so_q0); 353 TAILQ_INIT(&so->so_q); 354 cv_init(&so->so_cv, "socket"); 355 cv_init(&so->so_rcv.sb_cv, "netio"); 356 cv_init(&so->so_snd.sb_cv, "netio"); 357 selinit(&so->so_rcv.sb_sel); 358 selinit(&so->so_snd.sb_sel); 359 so->so_rcv.sb_so = so; 360 so->so_snd.sb_so = so; 361 return so; 362 } 363 364 void 365 soput(struct socket *so) 366 { 367 368 KASSERT(!cv_has_waiters(&so->so_cv)); 369 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 370 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 371 seldestroy(&so->so_rcv.sb_sel); 372 seldestroy(&so->so_snd.sb_sel); 373 mutex_obj_free(so->so_lock); 374 cv_destroy(&so->so_cv); 375 cv_destroy(&so->so_rcv.sb_cv); 376 cv_destroy(&so->so_snd.sb_cv); 377 pool_cache_put(socket_cache, so); 378 } 379 380 /* 381 * soqinsque: insert socket of a new connection into the specified 382 * accept queue of the listening socket (head). 383 * 384 * q = 0: queue of partial connections 385 * q = 1: queue of incoming connections 386 */ 387 void 388 soqinsque(struct socket *head, struct socket *so, int q) 389 { 390 KASSERT(q == 0 || q == 1); 391 KASSERT(solocked2(head, so)); 392 KASSERT(so->so_onq == NULL); 393 KASSERT(so->so_head == NULL); 394 395 so->so_head = head; 396 if (q == 0) { 397 head->so_q0len++; 398 so->so_onq = &head->so_q0; 399 } else { 400 head->so_qlen++; 401 so->so_onq = &head->so_q; 402 } 403 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 404 } 405 406 /* 407 * soqremque: remove socket from the specified queue. 408 * 409 * => Returns true if socket was removed from the specified queue. 410 * => False if socket was not removed (because it was in other queue). 411 */ 412 bool 413 soqremque(struct socket *so, int q) 414 { 415 struct socket *head = so->so_head; 416 417 KASSERT(q == 0 || q == 1); 418 KASSERT(solocked(so)); 419 KASSERT(so->so_onq != NULL); 420 KASSERT(head != NULL); 421 422 if (q == 0) { 423 if (so->so_onq != &head->so_q0) 424 return false; 425 head->so_q0len--; 426 } else { 427 if (so->so_onq != &head->so_q) 428 return false; 429 head->so_qlen--; 430 } 431 KASSERT(solocked2(so, head)); 432 TAILQ_REMOVE(so->so_onq, so, so_qe); 433 so->so_onq = NULL; 434 so->so_head = NULL; 435 return true; 436 } 437 438 /* 439 * socantsendmore: indicates that no more data will be sent on the 440 * socket; it would normally be applied to a socket when the user 441 * informs the system that no more data is to be sent, by the protocol 442 * code (in case pr_shutdown()). 443 */ 444 void 445 socantsendmore(struct socket *so) 446 { 447 KASSERT(solocked(so)); 448 449 so->so_state |= SS_CANTSENDMORE; 450 sowwakeup(so); 451 } 452 453 /* 454 * socantrcvmore(): indicates that no more data will be received and 455 * will normally be applied to the socket by a protocol when it detects 456 * that the peer will send no more data. Data queued for reading in 457 * the socket may yet be read. 458 */ 459 void 460 socantrcvmore(struct socket *so) 461 { 462 KASSERT(solocked(so)); 463 464 so->so_state |= SS_CANTRCVMORE; 465 sorwakeup(so); 466 } 467 468 /* 469 * Wait for data to arrive at/drain from a socket buffer. 470 */ 471 int 472 sbwait(struct sockbuf *sb) 473 { 474 struct socket *so; 475 kmutex_t *lock; 476 int error; 477 478 so = sb->sb_so; 479 480 KASSERT(solocked(so)); 481 482 sb->sb_flags |= SB_NOTIFY; 483 lock = so->so_lock; 484 if ((sb->sb_flags & SB_NOINTR) != 0) 485 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 486 else 487 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 488 if (__predict_false(lock != so->so_lock)) 489 solockretry(so, lock); 490 return error; 491 } 492 493 /* 494 * Wakeup processes waiting on a socket buffer. 495 * Do asynchronous notification via SIGIO 496 * if the socket buffer has the SB_ASYNC flag set. 497 */ 498 void 499 sowakeup(struct socket *so, struct sockbuf *sb, int code) 500 { 501 int band; 502 503 KASSERT(solocked(so)); 504 KASSERT(sb->sb_so == so); 505 506 if (code == POLL_IN) 507 band = POLLIN|POLLRDNORM; 508 else 509 band = POLLOUT|POLLWRNORM; 510 sb->sb_flags &= ~SB_NOTIFY; 511 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 512 cv_broadcast(&sb->sb_cv); 513 if (sb->sb_flags & SB_ASYNC) 514 fownsignal(so->so_pgid, SIGIO, code, band, so); 515 if (sb->sb_flags & SB_UPCALL) 516 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 517 } 518 519 /* 520 * Reset a socket's lock pointer. Wake all threads waiting on the 521 * socket's condition variables so that they can restart their waits 522 * using the new lock. The existing lock must be held. 523 */ 524 void 525 solockreset(struct socket *so, kmutex_t *lock) 526 { 527 528 KASSERT(solocked(so)); 529 530 so->so_lock = lock; 531 cv_broadcast(&so->so_snd.sb_cv); 532 cv_broadcast(&so->so_rcv.sb_cv); 533 cv_broadcast(&so->so_cv); 534 } 535 536 /* 537 * Socket buffer (struct sockbuf) utility routines. 538 * 539 * Each socket contains two socket buffers: one for sending data and 540 * one for receiving data. Each buffer contains a queue of mbufs, 541 * information about the number of mbufs and amount of data in the 542 * queue, and other fields allowing poll() statements and notification 543 * on data availability to be implemented. 544 * 545 * Data stored in a socket buffer is maintained as a list of records. 546 * Each record is a list of mbufs chained together with the m_next 547 * field. Records are chained together with the m_nextpkt field. The upper 548 * level routine soreceive() expects the following conventions to be 549 * observed when placing information in the receive buffer: 550 * 551 * 1. If the protocol requires each message be preceded by the sender's 552 * name, then a record containing that name must be present before 553 * any associated data (mbuf's must be of type MT_SONAME). 554 * 2. If the protocol supports the exchange of ``access rights'' (really 555 * just additional data associated with the message), and there are 556 * ``rights'' to be received, then a record containing this data 557 * should be present (mbuf's must be of type MT_CONTROL). 558 * 3. If a name or rights record exists, then it must be followed by 559 * a data record, perhaps of zero length. 560 * 561 * Before using a new socket structure it is first necessary to reserve 562 * buffer space to the socket, by calling sbreserve(). This should commit 563 * some of the available buffer space in the system buffer pool for the 564 * socket (currently, it does nothing but enforce limits). The space 565 * should be released by calling sbrelease() when the socket is destroyed. 566 */ 567 568 int 569 sb_max_set(u_long new_sbmax) 570 { 571 int s; 572 573 if (new_sbmax < (16 * 1024)) 574 return (EINVAL); 575 576 s = splsoftnet(); 577 sb_max = new_sbmax; 578 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 579 splx(s); 580 581 return (0); 582 } 583 584 int 585 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 586 { 587 KASSERT(so->so_pcb == NULL || solocked(so)); 588 589 /* 590 * there's at least one application (a configure script of screen) 591 * which expects a fifo is writable even if it has "some" bytes 592 * in its buffer. 593 * so we want to make sure (hiwat - lowat) >= (some bytes). 594 * 595 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 596 * we expect it's large enough for such applications. 597 */ 598 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 599 u_long hiwat = lowat + PIPE_BUF; 600 601 if (sndcc < hiwat) 602 sndcc = hiwat; 603 if (sbreserve(&so->so_snd, sndcc, so) == 0) 604 goto bad; 605 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 606 goto bad2; 607 if (so->so_rcv.sb_lowat == 0) 608 so->so_rcv.sb_lowat = 1; 609 if (so->so_snd.sb_lowat == 0) 610 so->so_snd.sb_lowat = lowat; 611 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 612 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 613 return (0); 614 bad2: 615 sbrelease(&so->so_snd, so); 616 bad: 617 return (ENOBUFS); 618 } 619 620 /* 621 * Allot mbufs to a sockbuf. 622 * Attempt to scale mbmax so that mbcnt doesn't become limiting 623 * if buffering efficiency is near the normal case. 624 */ 625 int 626 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 627 { 628 struct lwp *l = curlwp; /* XXX */ 629 rlim_t maxcc; 630 struct uidinfo *uidinfo; 631 632 KASSERT(so->so_pcb == NULL || solocked(so)); 633 KASSERT(sb->sb_so == so); 634 KASSERT(sb_max_adj != 0); 635 636 if (cc == 0 || cc > sb_max_adj) 637 return (0); 638 639 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 640 641 uidinfo = so->so_uidinfo; 642 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 643 return 0; 644 sb->sb_mbmax = min(cc * 2, sb_max); 645 if (sb->sb_lowat > sb->sb_hiwat) 646 sb->sb_lowat = sb->sb_hiwat; 647 return (1); 648 } 649 650 /* 651 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 652 * that the socket is held locked here: see sorflush(). 653 */ 654 void 655 sbrelease(struct sockbuf *sb, struct socket *so) 656 { 657 658 KASSERT(sb->sb_so == so); 659 660 sbflush(sb); 661 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 662 sb->sb_mbmax = 0; 663 } 664 665 /* 666 * Routines to add and remove 667 * data from an mbuf queue. 668 * 669 * The routines sbappend() or sbappendrecord() are normally called to 670 * append new mbufs to a socket buffer, after checking that adequate 671 * space is available, comparing the function sbspace() with the amount 672 * of data to be added. sbappendrecord() differs from sbappend() in 673 * that data supplied is treated as the beginning of a new record. 674 * To place a sender's address, optional access rights, and data in a 675 * socket receive buffer, sbappendaddr() should be used. To place 676 * access rights and data in a socket receive buffer, sbappendrights() 677 * should be used. In either case, the new data begins a new record. 678 * Note that unlike sbappend() and sbappendrecord(), these routines check 679 * for the caller that there will be enough space to store the data. 680 * Each fails if there is not enough space, or if it cannot find mbufs 681 * to store additional information in. 682 * 683 * Reliable protocols may use the socket send buffer to hold data 684 * awaiting acknowledgement. Data is normally copied from a socket 685 * send buffer in a protocol with m_copy for output to a peer, 686 * and then removing the data from the socket buffer with sbdrop() 687 * or sbdroprecord() when the data is acknowledged by the peer. 688 */ 689 690 #ifdef SOCKBUF_DEBUG 691 void 692 sblastrecordchk(struct sockbuf *sb, const char *where) 693 { 694 struct mbuf *m = sb->sb_mb; 695 696 KASSERT(solocked(sb->sb_so)); 697 698 while (m && m->m_nextpkt) 699 m = m->m_nextpkt; 700 701 if (m != sb->sb_lastrecord) { 702 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 703 sb->sb_mb, sb->sb_lastrecord, m); 704 printf("packet chain:\n"); 705 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 706 printf("\t%p\n", m); 707 panic("sblastrecordchk from %s", where); 708 } 709 } 710 711 void 712 sblastmbufchk(struct sockbuf *sb, const char *where) 713 { 714 struct mbuf *m = sb->sb_mb; 715 struct mbuf *n; 716 717 KASSERT(solocked(sb->sb_so)); 718 719 while (m && m->m_nextpkt) 720 m = m->m_nextpkt; 721 722 while (m && m->m_next) 723 m = m->m_next; 724 725 if (m != sb->sb_mbtail) { 726 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 727 sb->sb_mb, sb->sb_mbtail, m); 728 printf("packet tree:\n"); 729 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 730 printf("\t"); 731 for (n = m; n != NULL; n = n->m_next) 732 printf("%p ", n); 733 printf("\n"); 734 } 735 panic("sblastmbufchk from %s", where); 736 } 737 } 738 #endif /* SOCKBUF_DEBUG */ 739 740 /* 741 * Link a chain of records onto a socket buffer 742 */ 743 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 744 do { \ 745 if ((sb)->sb_lastrecord != NULL) \ 746 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 747 else \ 748 (sb)->sb_mb = (m0); \ 749 (sb)->sb_lastrecord = (mlast); \ 750 } while (/*CONSTCOND*/0) 751 752 753 #define SBLINKRECORD(sb, m0) \ 754 SBLINKRECORDCHAIN(sb, m0, m0) 755 756 /* 757 * Append mbuf chain m to the last record in the 758 * socket buffer sb. The additional space associated 759 * the mbuf chain is recorded in sb. Empty mbufs are 760 * discarded and mbufs are compacted where possible. 761 */ 762 void 763 sbappend(struct sockbuf *sb, struct mbuf *m) 764 { 765 struct mbuf *n; 766 767 KASSERT(solocked(sb->sb_so)); 768 769 if (m == NULL) 770 return; 771 772 #ifdef MBUFTRACE 773 m_claimm(m, sb->sb_mowner); 774 #endif 775 776 SBLASTRECORDCHK(sb, "sbappend 1"); 777 778 if ((n = sb->sb_lastrecord) != NULL) { 779 /* 780 * XXX Would like to simply use sb_mbtail here, but 781 * XXX I need to verify that I won't miss an EOR that 782 * XXX way. 783 */ 784 do { 785 if (n->m_flags & M_EOR) { 786 sbappendrecord(sb, m); /* XXXXXX!!!! */ 787 return; 788 } 789 } while (n->m_next && (n = n->m_next)); 790 } else { 791 /* 792 * If this is the first record in the socket buffer, it's 793 * also the last record. 794 */ 795 sb->sb_lastrecord = m; 796 } 797 sbcompress(sb, m, n); 798 SBLASTRECORDCHK(sb, "sbappend 2"); 799 } 800 801 /* 802 * This version of sbappend() should only be used when the caller 803 * absolutely knows that there will never be more than one record 804 * in the socket buffer, that is, a stream protocol (such as TCP). 805 */ 806 void 807 sbappendstream(struct sockbuf *sb, struct mbuf *m) 808 { 809 810 KASSERT(solocked(sb->sb_so)); 811 KDASSERT(m->m_nextpkt == NULL); 812 KASSERT(sb->sb_mb == sb->sb_lastrecord); 813 814 SBLASTMBUFCHK(sb, __func__); 815 816 #ifdef MBUFTRACE 817 m_claimm(m, sb->sb_mowner); 818 #endif 819 820 sbcompress(sb, m, sb->sb_mbtail); 821 822 sb->sb_lastrecord = sb->sb_mb; 823 SBLASTRECORDCHK(sb, __func__); 824 } 825 826 #ifdef SOCKBUF_DEBUG 827 void 828 sbcheck(struct sockbuf *sb) 829 { 830 struct mbuf *m, *m2; 831 u_long len, mbcnt; 832 833 KASSERT(solocked(sb->sb_so)); 834 835 len = 0; 836 mbcnt = 0; 837 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 838 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 839 len += m2->m_len; 840 mbcnt += MSIZE; 841 if (m2->m_flags & M_EXT) 842 mbcnt += m2->m_ext.ext_size; 843 if (m2->m_nextpkt != NULL) 844 panic("sbcheck nextpkt"); 845 } 846 } 847 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 848 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 849 mbcnt, sb->sb_mbcnt); 850 panic("sbcheck"); 851 } 852 } 853 #endif 854 855 /* 856 * As above, except the mbuf chain 857 * begins a new record. 858 */ 859 void 860 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 861 { 862 struct mbuf *m; 863 864 KASSERT(solocked(sb->sb_so)); 865 866 if (m0 == NULL) 867 return; 868 869 #ifdef MBUFTRACE 870 m_claimm(m0, sb->sb_mowner); 871 #endif 872 /* 873 * Put the first mbuf on the queue. 874 * Note this permits zero length records. 875 */ 876 sballoc(sb, m0); 877 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 878 SBLINKRECORD(sb, m0); 879 m = m0->m_next; 880 m0->m_next = 0; 881 if (m && (m0->m_flags & M_EOR)) { 882 m0->m_flags &= ~M_EOR; 883 m->m_flags |= M_EOR; 884 } 885 sbcompress(sb, m, m0); 886 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 887 } 888 889 /* 890 * As above except that OOB data 891 * is inserted at the beginning of the sockbuf, 892 * but after any other OOB data. 893 */ 894 void 895 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 896 { 897 struct mbuf *m, **mp; 898 899 KASSERT(solocked(sb->sb_so)); 900 901 if (m0 == NULL) 902 return; 903 904 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 905 906 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 907 again: 908 switch (m->m_type) { 909 910 case MT_OOBDATA: 911 continue; /* WANT next train */ 912 913 case MT_CONTROL: 914 if ((m = m->m_next) != NULL) 915 goto again; /* inspect THIS train further */ 916 } 917 break; 918 } 919 /* 920 * Put the first mbuf on the queue. 921 * Note this permits zero length records. 922 */ 923 sballoc(sb, m0); 924 m0->m_nextpkt = *mp; 925 if (*mp == NULL) { 926 /* m0 is actually the new tail */ 927 sb->sb_lastrecord = m0; 928 } 929 *mp = m0; 930 m = m0->m_next; 931 m0->m_next = 0; 932 if (m && (m0->m_flags & M_EOR)) { 933 m0->m_flags &= ~M_EOR; 934 m->m_flags |= M_EOR; 935 } 936 sbcompress(sb, m, m0); 937 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 938 } 939 940 /* 941 * Append address and data, and optionally, control (ancillary) data 942 * to the receive queue of a socket. If present, 943 * m0 must include a packet header with total length. 944 * Returns 0 if no space in sockbuf or insufficient mbufs. 945 */ 946 int 947 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 948 struct mbuf *control) 949 { 950 struct mbuf *m, *n, *nlast; 951 int space, len; 952 953 KASSERT(solocked(sb->sb_so)); 954 955 space = asa->sa_len; 956 957 if (m0 != NULL) { 958 if ((m0->m_flags & M_PKTHDR) == 0) 959 panic("sbappendaddr"); 960 space += m0->m_pkthdr.len; 961 #ifdef MBUFTRACE 962 m_claimm(m0, sb->sb_mowner); 963 #endif 964 } 965 for (n = control; n; n = n->m_next) { 966 space += n->m_len; 967 MCLAIM(n, sb->sb_mowner); 968 if (n->m_next == NULL) /* keep pointer to last control buf */ 969 break; 970 } 971 if (space > sbspace(sb)) 972 return (0); 973 m = m_get(M_DONTWAIT, MT_SONAME); 974 if (m == NULL) 975 return (0); 976 MCLAIM(m, sb->sb_mowner); 977 /* 978 * XXX avoid 'comparison always true' warning which isn't easily 979 * avoided. 980 */ 981 len = asa->sa_len; 982 if (len > MLEN) { 983 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 984 if ((m->m_flags & M_EXT) == 0) { 985 m_free(m); 986 return (0); 987 } 988 } 989 m->m_len = asa->sa_len; 990 memcpy(mtod(m, void *), asa, asa->sa_len); 991 if (n) 992 n->m_next = m0; /* concatenate data to control */ 993 else 994 control = m0; 995 m->m_next = control; 996 997 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 998 999 for (n = m; n->m_next != NULL; n = n->m_next) 1000 sballoc(sb, n); 1001 sballoc(sb, n); 1002 nlast = n; 1003 SBLINKRECORD(sb, m); 1004 1005 sb->sb_mbtail = nlast; 1006 SBLASTMBUFCHK(sb, "sbappendaddr"); 1007 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 1008 1009 return (1); 1010 } 1011 1012 /* 1013 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 1014 * an mbuf chain. 1015 */ 1016 static inline struct mbuf * 1017 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 1018 const struct sockaddr *asa) 1019 { 1020 struct mbuf *m; 1021 const int salen = asa->sa_len; 1022 1023 KASSERT(solocked(sb->sb_so)); 1024 1025 /* only the first in each chain need be a pkthdr */ 1026 m = m_gethdr(M_DONTWAIT, MT_SONAME); 1027 if (m == NULL) 1028 return NULL; 1029 MCLAIM(m, sb->sb_mowner); 1030 #ifdef notyet 1031 if (salen > MHLEN) { 1032 MEXTMALLOC(m, salen, M_NOWAIT); 1033 if ((m->m_flags & M_EXT) == 0) { 1034 m_free(m); 1035 return NULL; 1036 } 1037 } 1038 #else 1039 KASSERT(salen <= MHLEN); 1040 #endif 1041 m->m_len = salen; 1042 memcpy(mtod(m, void *), asa, salen); 1043 m->m_next = m0; 1044 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1045 1046 return m; 1047 } 1048 1049 int 1050 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1051 struct mbuf *m0, int sbprio) 1052 { 1053 struct mbuf *m, *n, *n0, *nlast; 1054 int error; 1055 1056 KASSERT(solocked(sb->sb_so)); 1057 1058 /* 1059 * XXX sbprio reserved for encoding priority of this* request: 1060 * SB_PRIO_NONE --> honour normal sb limits 1061 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1062 * take whole chain. Intended for large requests 1063 * that should be delivered atomically (all, or none). 1064 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1065 * over normal socket limits, for messages indicating 1066 * buffer overflow in earlier normal/lower-priority messages 1067 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1068 * Intended for kernel-generated messages only. 1069 * Up to generator to avoid total mbuf resource exhaustion. 1070 */ 1071 (void)sbprio; 1072 1073 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1074 panic("sbappendaddrchain"); 1075 1076 #ifdef notyet 1077 space = sbspace(sb); 1078 1079 /* 1080 * Enforce SB_PRIO_* limits as described above. 1081 */ 1082 #endif 1083 1084 n0 = NULL; 1085 nlast = NULL; 1086 for (m = m0; m; m = m->m_nextpkt) { 1087 struct mbuf *np; 1088 1089 #ifdef MBUFTRACE 1090 m_claimm(m, sb->sb_mowner); 1091 #endif 1092 1093 /* Prepend sockaddr to this record (m) of input chain m0 */ 1094 n = m_prepend_sockaddr(sb, m, asa); 1095 if (n == NULL) { 1096 error = ENOBUFS; 1097 goto bad; 1098 } 1099 1100 /* Append record (asa+m) to end of new chain n0 */ 1101 if (n0 == NULL) { 1102 n0 = n; 1103 } else { 1104 nlast->m_nextpkt = n; 1105 } 1106 /* Keep track of last record on new chain */ 1107 nlast = n; 1108 1109 for (np = n; np; np = np->m_next) 1110 sballoc(sb, np); 1111 } 1112 1113 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1114 1115 /* Drop the entire chain of (asa+m) records onto the socket */ 1116 SBLINKRECORDCHAIN(sb, n0, nlast); 1117 1118 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1119 1120 for (m = nlast; m->m_next; m = m->m_next) 1121 ; 1122 sb->sb_mbtail = m; 1123 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1124 1125 return (1); 1126 1127 bad: 1128 /* 1129 * On error, free the prepended addreseses. For consistency 1130 * with sbappendaddr(), leave it to our caller to free 1131 * the input record chain passed to us as m0. 1132 */ 1133 while ((n = n0) != NULL) { 1134 struct mbuf *np; 1135 1136 /* Undo the sballoc() of this record */ 1137 for (np = n; np; np = np->m_next) 1138 sbfree(sb, np); 1139 1140 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1141 MFREE(n, np); /* free prepended address (not data) */ 1142 } 1143 return error; 1144 } 1145 1146 1147 int 1148 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1149 { 1150 struct mbuf *m, *mlast, *n; 1151 int space; 1152 1153 KASSERT(solocked(sb->sb_so)); 1154 1155 space = 0; 1156 if (control == NULL) 1157 panic("sbappendcontrol"); 1158 for (m = control; ; m = m->m_next) { 1159 space += m->m_len; 1160 MCLAIM(m, sb->sb_mowner); 1161 if (m->m_next == NULL) 1162 break; 1163 } 1164 n = m; /* save pointer to last control buffer */ 1165 for (m = m0; m; m = m->m_next) { 1166 MCLAIM(m, sb->sb_mowner); 1167 space += m->m_len; 1168 } 1169 if (space > sbspace(sb)) 1170 return (0); 1171 n->m_next = m0; /* concatenate data to control */ 1172 1173 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1174 1175 for (m = control; m->m_next != NULL; m = m->m_next) 1176 sballoc(sb, m); 1177 sballoc(sb, m); 1178 mlast = m; 1179 SBLINKRECORD(sb, control); 1180 1181 sb->sb_mbtail = mlast; 1182 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1183 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1184 1185 return (1); 1186 } 1187 1188 /* 1189 * Compress mbuf chain m into the socket 1190 * buffer sb following mbuf n. If n 1191 * is null, the buffer is presumed empty. 1192 */ 1193 void 1194 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1195 { 1196 int eor; 1197 struct mbuf *o; 1198 1199 KASSERT(solocked(sb->sb_so)); 1200 1201 eor = 0; 1202 while (m) { 1203 eor |= m->m_flags & M_EOR; 1204 if (m->m_len == 0 && 1205 (eor == 0 || 1206 (((o = m->m_next) || (o = n)) && 1207 o->m_type == m->m_type))) { 1208 if (sb->sb_lastrecord == m) 1209 sb->sb_lastrecord = m->m_next; 1210 m = m_free(m); 1211 continue; 1212 } 1213 if (n && (n->m_flags & M_EOR) == 0 && 1214 /* M_TRAILINGSPACE() checks buffer writeability */ 1215 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1216 m->m_len <= M_TRAILINGSPACE(n) && 1217 n->m_type == m->m_type) { 1218 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1219 (unsigned)m->m_len); 1220 n->m_len += m->m_len; 1221 sb->sb_cc += m->m_len; 1222 m = m_free(m); 1223 continue; 1224 } 1225 if (n) 1226 n->m_next = m; 1227 else 1228 sb->sb_mb = m; 1229 sb->sb_mbtail = m; 1230 sballoc(sb, m); 1231 n = m; 1232 m->m_flags &= ~M_EOR; 1233 m = m->m_next; 1234 n->m_next = 0; 1235 } 1236 if (eor) { 1237 if (n) 1238 n->m_flags |= eor; 1239 else 1240 printf("semi-panic: sbcompress\n"); 1241 } 1242 SBLASTMBUFCHK(sb, __func__); 1243 } 1244 1245 /* 1246 * Free all mbufs in a sockbuf. 1247 * Check that all resources are reclaimed. 1248 */ 1249 void 1250 sbflush(struct sockbuf *sb) 1251 { 1252 1253 KASSERT(solocked(sb->sb_so)); 1254 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1255 1256 while (sb->sb_mbcnt) 1257 sbdrop(sb, (int)sb->sb_cc); 1258 1259 KASSERT(sb->sb_cc == 0); 1260 KASSERT(sb->sb_mb == NULL); 1261 KASSERT(sb->sb_mbtail == NULL); 1262 KASSERT(sb->sb_lastrecord == NULL); 1263 } 1264 1265 /* 1266 * Drop data from (the front of) a sockbuf. 1267 */ 1268 void 1269 sbdrop(struct sockbuf *sb, int len) 1270 { 1271 struct mbuf *m, *mn, *next; 1272 1273 KASSERT(solocked(sb->sb_so)); 1274 1275 next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; 1276 while (len > 0) { 1277 if (m == NULL) { 1278 if (next == NULL) 1279 panic("sbdrop(%p,%d): cc=%lu", 1280 sb, len, sb->sb_cc); 1281 m = next; 1282 next = m->m_nextpkt; 1283 continue; 1284 } 1285 if (m->m_len > len) { 1286 m->m_len -= len; 1287 m->m_data += len; 1288 sb->sb_cc -= len; 1289 break; 1290 } 1291 len -= m->m_len; 1292 sbfree(sb, m); 1293 MFREE(m, mn); 1294 m = mn; 1295 } 1296 while (m && m->m_len == 0) { 1297 sbfree(sb, m); 1298 MFREE(m, mn); 1299 m = mn; 1300 } 1301 if (m) { 1302 sb->sb_mb = m; 1303 m->m_nextpkt = next; 1304 } else 1305 sb->sb_mb = next; 1306 /* 1307 * First part is an inline SB_EMPTY_FIXUP(). Second part 1308 * makes sure sb_lastrecord is up-to-date if we dropped 1309 * part of the last record. 1310 */ 1311 m = sb->sb_mb; 1312 if (m == NULL) { 1313 sb->sb_mbtail = NULL; 1314 sb->sb_lastrecord = NULL; 1315 } else if (m->m_nextpkt == NULL) 1316 sb->sb_lastrecord = m; 1317 } 1318 1319 /* 1320 * Drop a record off the front of a sockbuf 1321 * and move the next record to the front. 1322 */ 1323 void 1324 sbdroprecord(struct sockbuf *sb) 1325 { 1326 struct mbuf *m, *mn; 1327 1328 KASSERT(solocked(sb->sb_so)); 1329 1330 m = sb->sb_mb; 1331 if (m) { 1332 sb->sb_mb = m->m_nextpkt; 1333 do { 1334 sbfree(sb, m); 1335 MFREE(m, mn); 1336 } while ((m = mn) != NULL); 1337 } 1338 SB_EMPTY_FIXUP(sb); 1339 } 1340 1341 /* 1342 * Create a "control" mbuf containing the specified data 1343 * with the specified type for presentation on a socket buffer. 1344 */ 1345 struct mbuf * 1346 sbcreatecontrol1(void **p, int size, int type, int level, int flags) 1347 { 1348 struct cmsghdr *cp; 1349 struct mbuf *m; 1350 int space = CMSG_SPACE(size); 1351 1352 if ((flags & M_DONTWAIT) && space > MCLBYTES) { 1353 printf("%s: message too large %d\n", __func__, space); 1354 return NULL; 1355 } 1356 1357 if ((m = m_get(flags, MT_CONTROL)) == NULL) 1358 return NULL; 1359 if (space > MLEN) { 1360 if (space > MCLBYTES) 1361 MEXTMALLOC(m, space, M_WAITOK); 1362 else 1363 MCLGET(m, flags); 1364 if ((m->m_flags & M_EXT) == 0) { 1365 m_free(m); 1366 return NULL; 1367 } 1368 } 1369 cp = mtod(m, struct cmsghdr *); 1370 *p = CMSG_DATA(cp); 1371 m->m_len = space; 1372 cp->cmsg_len = CMSG_LEN(size); 1373 cp->cmsg_level = level; 1374 cp->cmsg_type = type; 1375 return m; 1376 } 1377 1378 struct mbuf * 1379 sbcreatecontrol(void *p, int size, int type, int level) 1380 { 1381 struct mbuf *m; 1382 void *v; 1383 1384 m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT); 1385 if (m == NULL) 1386 return NULL; 1387 memcpy(v, p, size); 1388 return m; 1389 } 1390 1391 void 1392 solockretry(struct socket *so, kmutex_t *lock) 1393 { 1394 1395 while (lock != so->so_lock) { 1396 mutex_exit(lock); 1397 lock = so->so_lock; 1398 mutex_enter(lock); 1399 } 1400 } 1401 1402 bool 1403 solocked(struct socket *so) 1404 { 1405 1406 return mutex_owned(so->so_lock); 1407 } 1408 1409 bool 1410 solocked2(struct socket *so1, struct socket *so2) 1411 { 1412 kmutex_t *lock; 1413 1414 lock = so1->so_lock; 1415 if (lock != so2->so_lock) 1416 return false; 1417 return mutex_owned(lock); 1418 } 1419 1420 /* 1421 * sosetlock: assign a default lock to a new socket. 1422 */ 1423 void 1424 sosetlock(struct socket *so) 1425 { 1426 if (so->so_lock == NULL) { 1427 kmutex_t *lock = softnet_lock; 1428 1429 so->so_lock = lock; 1430 mutex_obj_hold(lock); 1431 mutex_enter(lock); 1432 } 1433 KASSERT(solocked(so)); 1434 } 1435 1436 /* 1437 * Set lock on sockbuf sb; sleep if lock is already held. 1438 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1439 * Returns error without lock if sleep is interrupted. 1440 */ 1441 int 1442 sblock(struct sockbuf *sb, int wf) 1443 { 1444 struct socket *so; 1445 kmutex_t *lock; 1446 int error; 1447 1448 KASSERT(solocked(sb->sb_so)); 1449 1450 for (;;) { 1451 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1452 sb->sb_flags |= SB_LOCK; 1453 return 0; 1454 } 1455 if (wf != M_WAITOK) 1456 return EWOULDBLOCK; 1457 so = sb->sb_so; 1458 lock = so->so_lock; 1459 if ((sb->sb_flags & SB_NOINTR) != 0) { 1460 cv_wait(&so->so_cv, lock); 1461 error = 0; 1462 } else 1463 error = cv_wait_sig(&so->so_cv, lock); 1464 if (__predict_false(lock != so->so_lock)) 1465 solockretry(so, lock); 1466 if (error != 0) 1467 return error; 1468 } 1469 } 1470 1471 void 1472 sbunlock(struct sockbuf *sb) 1473 { 1474 struct socket *so; 1475 1476 so = sb->sb_so; 1477 1478 KASSERT(solocked(so)); 1479 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1480 1481 sb->sb_flags &= ~SB_LOCK; 1482 cv_broadcast(&so->so_cv); 1483 } 1484 1485 int 1486 sowait(struct socket *so, bool catch_p, int timo) 1487 { 1488 kmutex_t *lock; 1489 int error; 1490 1491 KASSERT(solocked(so)); 1492 KASSERT(catch_p || timo != 0); 1493 1494 lock = so->so_lock; 1495 if (catch_p) 1496 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1497 else 1498 error = cv_timedwait(&so->so_cv, lock, timo); 1499 if (__predict_false(lock != so->so_lock)) 1500 solockretry(so, lock); 1501 return error; 1502 } 1503