1 /* $NetBSD: uipc_socket2.c,v 1.121 2014/09/05 05:57:21 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.121 2014/09/05 05:57:21 matt Exp $"); 62 63 #include "opt_mbuftrace.h" 64 #include "opt_sb_max.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/proc.h> 69 #include <sys/file.h> 70 #include <sys/buf.h> 71 #include <sys/mbuf.h> 72 #include <sys/protosw.h> 73 #include <sys/domain.h> 74 #include <sys/poll.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/signalvar.h> 78 #include <sys/kauth.h> 79 #include <sys/pool.h> 80 #include <sys/uidinfo.h> 81 82 /* 83 * Primitive routines for operating on sockets and socket buffers. 84 * 85 * Connection life-cycle: 86 * 87 * Normal sequence from the active (originating) side: 88 * 89 * - soisconnecting() is called during processing of connect() call, 90 * - resulting in an eventual call to soisconnected() if/when the 91 * connection is established. 92 * 93 * When the connection is torn down during processing of disconnect(): 94 * 95 * - soisdisconnecting() is called and, 96 * - soisdisconnected() is called when the connection to the peer 97 * is totally severed. 98 * 99 * The semantics of these routines are such that connectionless protocols 100 * can call soisconnected() and soisdisconnected() only, bypassing the 101 * in-progress calls when setting up a ``connection'' takes no time. 102 * 103 * From the passive side, a socket is created with two queues of sockets: 104 * 105 * - so_q0 (0) for partial connections (i.e. connections in progress) 106 * - so_q (1) for connections already made and awaiting user acceptance. 107 * 108 * As a protocol is preparing incoming connections, it creates a socket 109 * structure queued on so_q0 by calling sonewconn(). When the connection 110 * is established, soisconnected() is called, and transfers the 111 * socket structure to so_q, making it available to accept(). 112 * 113 * If a socket is closed with sockets on either so_q0 or so_q, these 114 * sockets are dropped. 115 * 116 * Locking rules and assumptions: 117 * 118 * o socket::so_lock can change on the fly. The low level routines used 119 * to lock sockets are aware of this. When so_lock is acquired, the 120 * routine locking must check to see if so_lock still points to the 121 * lock that was acquired. If so_lock has changed in the meantime, the 122 * now irrelevant lock that was acquired must be dropped and the lock 123 * operation retried. Although not proven here, this is completely safe 124 * on a multiprocessor system, even with relaxed memory ordering, given 125 * the next two rules: 126 * 127 * o In order to mutate so_lock, the lock pointed to by the current value 128 * of so_lock must be held: i.e., the socket must be held locked by the 129 * changing thread. The thread must issue membar_exit() to prevent 130 * memory accesses being reordered, and can set so_lock to the desired 131 * value. If the lock pointed to by the new value of so_lock is not 132 * held by the changing thread, the socket must then be considered 133 * unlocked. 134 * 135 * o If so_lock is mutated, and the previous lock referred to by so_lock 136 * could still be visible to other threads in the system (e.g. via file 137 * descriptor or protocol-internal reference), then the old lock must 138 * remain valid until the socket and/or protocol control block has been 139 * torn down. 140 * 141 * o If a socket has a non-NULL so_head value (i.e. is in the process of 142 * connecting), then locking the socket must also lock the socket pointed 143 * to by so_head: their lock pointers must match. 144 * 145 * o If a socket has connections in progress (so_q, so_q0 not empty) then 146 * locking the socket must also lock the sockets attached to both queues. 147 * Again, their lock pointers must match. 148 * 149 * o Beyond the initial lock assignment in socreate(), assigning locks to 150 * sockets is the responsibility of the individual protocols / protocol 151 * domains. 152 */ 153 154 static pool_cache_t socket_cache; 155 u_long sb_max = SB_MAX;/* maximum socket buffer size */ 156 static u_long sb_max_adj; /* adjusted sb_max */ 157 158 void 159 soisconnecting(struct socket *so) 160 { 161 162 KASSERT(solocked(so)); 163 164 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 165 so->so_state |= SS_ISCONNECTING; 166 } 167 168 void 169 soisconnected(struct socket *so) 170 { 171 struct socket *head; 172 173 head = so->so_head; 174 175 KASSERT(solocked(so)); 176 KASSERT(head == NULL || solocked2(so, head)); 177 178 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING); 179 so->so_state |= SS_ISCONNECTED; 180 if (head && so->so_onq == &head->so_q0) { 181 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 182 /* 183 * Re-enqueue and wake up any waiters, e.g. 184 * processes blocking on accept(). 185 */ 186 soqremque(so, 0); 187 soqinsque(head, so, 1); 188 sorwakeup(head); 189 cv_broadcast(&head->so_cv); 190 } else { 191 so->so_upcall = 192 head->so_accf->so_accept_filter->accf_callback; 193 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 194 so->so_rcv.sb_flags |= SB_UPCALL; 195 so->so_options &= ~SO_ACCEPTFILTER; 196 (*so->so_upcall)(so, so->so_upcallarg, 197 POLLIN|POLLRDNORM, M_DONTWAIT); 198 } 199 } else { 200 cv_broadcast(&so->so_cv); 201 sorwakeup(so); 202 sowwakeup(so); 203 } 204 } 205 206 void 207 soisdisconnecting(struct socket *so) 208 { 209 210 KASSERT(solocked(so)); 211 212 so->so_state &= ~SS_ISCONNECTING; 213 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 214 cv_broadcast(&so->so_cv); 215 sowwakeup(so); 216 sorwakeup(so); 217 } 218 219 void 220 soisdisconnected(struct socket *so) 221 { 222 223 KASSERT(solocked(so)); 224 225 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 226 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 227 cv_broadcast(&so->so_cv); 228 sowwakeup(so); 229 sorwakeup(so); 230 } 231 232 void 233 soinit2(void) 234 { 235 236 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 237 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 238 } 239 240 /* 241 * sonewconn: accept a new connection. 242 * 243 * When an attempt at a new connection is noted on a socket which accepts 244 * connections, sonewconn(9) is called. If the connection is possible 245 * (subject to space constraints, etc) then we allocate a new structure, 246 * properly linked into the data structure of the original socket. 247 * 248 * => If 'soready' is true, then socket will become ready for accept() i.e. 249 * inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken. 250 * => May be called from soft-interrupt context. 251 * => Listening socket should be locked. 252 * => Returns the new socket locked. 253 */ 254 struct socket * 255 sonewconn(struct socket *head, bool soready) 256 { 257 struct socket *so; 258 int soqueue, error; 259 260 KASSERT(solocked(head)); 261 262 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) { 263 /* Listen queue overflow. */ 264 return NULL; 265 } 266 if ((head->so_options & SO_ACCEPTFILTER) != 0) { 267 soready = false; 268 } 269 soqueue = soready ? 1 : 0; 270 271 if ((so = soget(false)) == NULL) { 272 return NULL; 273 } 274 so->so_type = head->so_type; 275 so->so_options = head->so_options & ~SO_ACCEPTCONN; 276 so->so_linger = head->so_linger; 277 so->so_state = head->so_state | SS_NOFDREF; 278 so->so_proto = head->so_proto; 279 so->so_timeo = head->so_timeo; 280 so->so_pgid = head->so_pgid; 281 so->so_send = head->so_send; 282 so->so_receive = head->so_receive; 283 so->so_uidinfo = head->so_uidinfo; 284 so->so_cpid = head->so_cpid; 285 286 /* 287 * Share the lock with the listening-socket, it may get unshared 288 * once the connection is complete. 289 */ 290 mutex_obj_hold(head->so_lock); 291 so->so_lock = head->so_lock; 292 293 /* 294 * Reserve the space for socket buffers. 295 */ 296 #ifdef MBUFTRACE 297 so->so_mowner = head->so_mowner; 298 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 299 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 300 #endif 301 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { 302 goto out; 303 } 304 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 305 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 306 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 307 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 308 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 309 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 310 311 /* 312 * Finally, perform the protocol attach. Note: a new socket 313 * lock may be assigned at this point (if so, it will be held). 314 */ 315 error = (*so->so_proto->pr_usrreqs->pr_attach)(so, 0); 316 if (error) { 317 out: 318 KASSERT(solocked(so)); 319 KASSERT(so->so_accf == NULL); 320 soput(so); 321 322 /* Note: the listening socket shall stay locked. */ 323 KASSERT(solocked(head)); 324 return NULL; 325 } 326 KASSERT(solocked2(head, so)); 327 328 /* 329 * Insert into the queue. If ready, update the connection status 330 * and wake up any waiters, e.g. processes blocking on accept(). 331 */ 332 soqinsque(head, so, soqueue); 333 if (soready) { 334 so->so_state |= SS_ISCONNECTED; 335 sorwakeup(head); 336 cv_broadcast(&head->so_cv); 337 } 338 return so; 339 } 340 341 struct socket * 342 soget(bool waitok) 343 { 344 struct socket *so; 345 346 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 347 if (__predict_false(so == NULL)) 348 return (NULL); 349 memset(so, 0, sizeof(*so)); 350 TAILQ_INIT(&so->so_q0); 351 TAILQ_INIT(&so->so_q); 352 cv_init(&so->so_cv, "socket"); 353 cv_init(&so->so_rcv.sb_cv, "netio"); 354 cv_init(&so->so_snd.sb_cv, "netio"); 355 selinit(&so->so_rcv.sb_sel); 356 selinit(&so->so_snd.sb_sel); 357 so->so_rcv.sb_so = so; 358 so->so_snd.sb_so = so; 359 return so; 360 } 361 362 void 363 soput(struct socket *so) 364 { 365 366 KASSERT(!cv_has_waiters(&so->so_cv)); 367 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 368 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 369 seldestroy(&so->so_rcv.sb_sel); 370 seldestroy(&so->so_snd.sb_sel); 371 mutex_obj_free(so->so_lock); 372 cv_destroy(&so->so_cv); 373 cv_destroy(&so->so_rcv.sb_cv); 374 cv_destroy(&so->so_snd.sb_cv); 375 pool_cache_put(socket_cache, so); 376 } 377 378 /* 379 * soqinsque: insert socket of a new connection into the specified 380 * accept queue of the listening socket (head). 381 * 382 * q = 0: queue of partial connections 383 * q = 1: queue of incoming connections 384 */ 385 void 386 soqinsque(struct socket *head, struct socket *so, int q) 387 { 388 KASSERT(q == 0 || q == 1); 389 KASSERT(solocked2(head, so)); 390 KASSERT(so->so_onq == NULL); 391 KASSERT(so->so_head == NULL); 392 393 so->so_head = head; 394 if (q == 0) { 395 head->so_q0len++; 396 so->so_onq = &head->so_q0; 397 } else { 398 head->so_qlen++; 399 so->so_onq = &head->so_q; 400 } 401 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 402 } 403 404 /* 405 * soqremque: remove socket from the specified queue. 406 * 407 * => Returns true if socket was removed from the specified queue. 408 * => False if socket was not removed (because it was in other queue). 409 */ 410 bool 411 soqremque(struct socket *so, int q) 412 { 413 struct socket *head = so->so_head; 414 415 KASSERT(q == 0 || q == 1); 416 KASSERT(solocked(so)); 417 KASSERT(so->so_onq != NULL); 418 KASSERT(head != NULL); 419 420 if (q == 0) { 421 if (so->so_onq != &head->so_q0) 422 return false; 423 head->so_q0len--; 424 } else { 425 if (so->so_onq != &head->so_q) 426 return false; 427 head->so_qlen--; 428 } 429 KASSERT(solocked2(so, head)); 430 TAILQ_REMOVE(so->so_onq, so, so_qe); 431 so->so_onq = NULL; 432 so->so_head = NULL; 433 return true; 434 } 435 436 /* 437 * socantsendmore: indicates that no more data will be sent on the 438 * socket; it would normally be applied to a socket when the user 439 * informs the system that no more data is to be sent, by the protocol 440 * code (in case pr_shutdown()). 441 */ 442 void 443 socantsendmore(struct socket *so) 444 { 445 KASSERT(solocked(so)); 446 447 so->so_state |= SS_CANTSENDMORE; 448 sowwakeup(so); 449 } 450 451 /* 452 * socantrcvmore(): indicates that no more data will be received and 453 * will normally be applied to the socket by a protocol when it detects 454 * that the peer will send no more data. Data queued for reading in 455 * the socket may yet be read. 456 */ 457 void 458 socantrcvmore(struct socket *so) 459 { 460 KASSERT(solocked(so)); 461 462 so->so_state |= SS_CANTRCVMORE; 463 sorwakeup(so); 464 } 465 466 /* 467 * Wait for data to arrive at/drain from a socket buffer. 468 */ 469 int 470 sbwait(struct sockbuf *sb) 471 { 472 struct socket *so; 473 kmutex_t *lock; 474 int error; 475 476 so = sb->sb_so; 477 478 KASSERT(solocked(so)); 479 480 sb->sb_flags |= SB_NOTIFY; 481 lock = so->so_lock; 482 if ((sb->sb_flags & SB_NOINTR) != 0) 483 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 484 else 485 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 486 if (__predict_false(lock != so->so_lock)) 487 solockretry(so, lock); 488 return error; 489 } 490 491 /* 492 * Wakeup processes waiting on a socket buffer. 493 * Do asynchronous notification via SIGIO 494 * if the socket buffer has the SB_ASYNC flag set. 495 */ 496 void 497 sowakeup(struct socket *so, struct sockbuf *sb, int code) 498 { 499 int band; 500 501 KASSERT(solocked(so)); 502 KASSERT(sb->sb_so == so); 503 504 if (code == POLL_IN) 505 band = POLLIN|POLLRDNORM; 506 else 507 band = POLLOUT|POLLWRNORM; 508 sb->sb_flags &= ~SB_NOTIFY; 509 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 510 cv_broadcast(&sb->sb_cv); 511 if (sb->sb_flags & SB_ASYNC) 512 fownsignal(so->so_pgid, SIGIO, code, band, so); 513 if (sb->sb_flags & SB_UPCALL) 514 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 515 } 516 517 /* 518 * Reset a socket's lock pointer. Wake all threads waiting on the 519 * socket's condition variables so that they can restart their waits 520 * using the new lock. The existing lock must be held. 521 */ 522 void 523 solockreset(struct socket *so, kmutex_t *lock) 524 { 525 526 KASSERT(solocked(so)); 527 528 so->so_lock = lock; 529 cv_broadcast(&so->so_snd.sb_cv); 530 cv_broadcast(&so->so_rcv.sb_cv); 531 cv_broadcast(&so->so_cv); 532 } 533 534 /* 535 * Socket buffer (struct sockbuf) utility routines. 536 * 537 * Each socket contains two socket buffers: one for sending data and 538 * one for receiving data. Each buffer contains a queue of mbufs, 539 * information about the number of mbufs and amount of data in the 540 * queue, and other fields allowing poll() statements and notification 541 * on data availability to be implemented. 542 * 543 * Data stored in a socket buffer is maintained as a list of records. 544 * Each record is a list of mbufs chained together with the m_next 545 * field. Records are chained together with the m_nextpkt field. The upper 546 * level routine soreceive() expects the following conventions to be 547 * observed when placing information in the receive buffer: 548 * 549 * 1. If the protocol requires each message be preceded by the sender's 550 * name, then a record containing that name must be present before 551 * any associated data (mbuf's must be of type MT_SONAME). 552 * 2. If the protocol supports the exchange of ``access rights'' (really 553 * just additional data associated with the message), and there are 554 * ``rights'' to be received, then a record containing this data 555 * should be present (mbuf's must be of type MT_CONTROL). 556 * 3. If a name or rights record exists, then it must be followed by 557 * a data record, perhaps of zero length. 558 * 559 * Before using a new socket structure it is first necessary to reserve 560 * buffer space to the socket, by calling sbreserve(). This should commit 561 * some of the available buffer space in the system buffer pool for the 562 * socket (currently, it does nothing but enforce limits). The space 563 * should be released by calling sbrelease() when the socket is destroyed. 564 */ 565 566 int 567 sb_max_set(u_long new_sbmax) 568 { 569 int s; 570 571 if (new_sbmax < (16 * 1024)) 572 return (EINVAL); 573 574 s = splsoftnet(); 575 sb_max = new_sbmax; 576 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 577 splx(s); 578 579 return (0); 580 } 581 582 int 583 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 584 { 585 KASSERT(so->so_pcb == NULL || solocked(so)); 586 587 /* 588 * there's at least one application (a configure script of screen) 589 * which expects a fifo is writable even if it has "some" bytes 590 * in its buffer. 591 * so we want to make sure (hiwat - lowat) >= (some bytes). 592 * 593 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 594 * we expect it's large enough for such applications. 595 */ 596 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 597 u_long hiwat = lowat + PIPE_BUF; 598 599 if (sndcc < hiwat) 600 sndcc = hiwat; 601 if (sbreserve(&so->so_snd, sndcc, so) == 0) 602 goto bad; 603 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 604 goto bad2; 605 if (so->so_rcv.sb_lowat == 0) 606 so->so_rcv.sb_lowat = 1; 607 if (so->so_snd.sb_lowat == 0) 608 so->so_snd.sb_lowat = lowat; 609 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 610 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 611 return (0); 612 bad2: 613 sbrelease(&so->so_snd, so); 614 bad: 615 return (ENOBUFS); 616 } 617 618 /* 619 * Allot mbufs to a sockbuf. 620 * Attempt to scale mbmax so that mbcnt doesn't become limiting 621 * if buffering efficiency is near the normal case. 622 */ 623 int 624 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 625 { 626 struct lwp *l = curlwp; /* XXX */ 627 rlim_t maxcc; 628 struct uidinfo *uidinfo; 629 630 KASSERT(so->so_pcb == NULL || solocked(so)); 631 KASSERT(sb->sb_so == so); 632 KASSERT(sb_max_adj != 0); 633 634 if (cc == 0 || cc > sb_max_adj) 635 return (0); 636 637 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 638 639 uidinfo = so->so_uidinfo; 640 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 641 return 0; 642 sb->sb_mbmax = min(cc * 2, sb_max); 643 if (sb->sb_lowat > sb->sb_hiwat) 644 sb->sb_lowat = sb->sb_hiwat; 645 return (1); 646 } 647 648 /* 649 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 650 * that the socket is held locked here: see sorflush(). 651 */ 652 void 653 sbrelease(struct sockbuf *sb, struct socket *so) 654 { 655 656 KASSERT(sb->sb_so == so); 657 658 sbflush(sb); 659 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 660 sb->sb_mbmax = 0; 661 } 662 663 /* 664 * Routines to add and remove 665 * data from an mbuf queue. 666 * 667 * The routines sbappend() or sbappendrecord() are normally called to 668 * append new mbufs to a socket buffer, after checking that adequate 669 * space is available, comparing the function sbspace() with the amount 670 * of data to be added. sbappendrecord() differs from sbappend() in 671 * that data supplied is treated as the beginning of a new record. 672 * To place a sender's address, optional access rights, and data in a 673 * socket receive buffer, sbappendaddr() should be used. To place 674 * access rights and data in a socket receive buffer, sbappendrights() 675 * should be used. In either case, the new data begins a new record. 676 * Note that unlike sbappend() and sbappendrecord(), these routines check 677 * for the caller that there will be enough space to store the data. 678 * Each fails if there is not enough space, or if it cannot find mbufs 679 * to store additional information in. 680 * 681 * Reliable protocols may use the socket send buffer to hold data 682 * awaiting acknowledgement. Data is normally copied from a socket 683 * send buffer in a protocol with m_copy for output to a peer, 684 * and then removing the data from the socket buffer with sbdrop() 685 * or sbdroprecord() when the data is acknowledged by the peer. 686 */ 687 688 #ifdef SOCKBUF_DEBUG 689 void 690 sblastrecordchk(struct sockbuf *sb, const char *where) 691 { 692 struct mbuf *m = sb->sb_mb; 693 694 KASSERT(solocked(sb->sb_so)); 695 696 while (m && m->m_nextpkt) 697 m = m->m_nextpkt; 698 699 if (m != sb->sb_lastrecord) { 700 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 701 sb->sb_mb, sb->sb_lastrecord, m); 702 printf("packet chain:\n"); 703 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 704 printf("\t%p\n", m); 705 panic("sblastrecordchk from %s", where); 706 } 707 } 708 709 void 710 sblastmbufchk(struct sockbuf *sb, const char *where) 711 { 712 struct mbuf *m = sb->sb_mb; 713 struct mbuf *n; 714 715 KASSERT(solocked(sb->sb_so)); 716 717 while (m && m->m_nextpkt) 718 m = m->m_nextpkt; 719 720 while (m && m->m_next) 721 m = m->m_next; 722 723 if (m != sb->sb_mbtail) { 724 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 725 sb->sb_mb, sb->sb_mbtail, m); 726 printf("packet tree:\n"); 727 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 728 printf("\t"); 729 for (n = m; n != NULL; n = n->m_next) 730 printf("%p ", n); 731 printf("\n"); 732 } 733 panic("sblastmbufchk from %s", where); 734 } 735 } 736 #endif /* SOCKBUF_DEBUG */ 737 738 /* 739 * Link a chain of records onto a socket buffer 740 */ 741 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 742 do { \ 743 if ((sb)->sb_lastrecord != NULL) \ 744 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 745 else \ 746 (sb)->sb_mb = (m0); \ 747 (sb)->sb_lastrecord = (mlast); \ 748 } while (/*CONSTCOND*/0) 749 750 751 #define SBLINKRECORD(sb, m0) \ 752 SBLINKRECORDCHAIN(sb, m0, m0) 753 754 /* 755 * Append mbuf chain m to the last record in the 756 * socket buffer sb. The additional space associated 757 * the mbuf chain is recorded in sb. Empty mbufs are 758 * discarded and mbufs are compacted where possible. 759 */ 760 void 761 sbappend(struct sockbuf *sb, struct mbuf *m) 762 { 763 struct mbuf *n; 764 765 KASSERT(solocked(sb->sb_so)); 766 767 if (m == NULL) 768 return; 769 770 #ifdef MBUFTRACE 771 m_claimm(m, sb->sb_mowner); 772 #endif 773 774 SBLASTRECORDCHK(sb, "sbappend 1"); 775 776 if ((n = sb->sb_lastrecord) != NULL) { 777 /* 778 * XXX Would like to simply use sb_mbtail here, but 779 * XXX I need to verify that I won't miss an EOR that 780 * XXX way. 781 */ 782 do { 783 if (n->m_flags & M_EOR) { 784 sbappendrecord(sb, m); /* XXXXXX!!!! */ 785 return; 786 } 787 } while (n->m_next && (n = n->m_next)); 788 } else { 789 /* 790 * If this is the first record in the socket buffer, it's 791 * also the last record. 792 */ 793 sb->sb_lastrecord = m; 794 } 795 sbcompress(sb, m, n); 796 SBLASTRECORDCHK(sb, "sbappend 2"); 797 } 798 799 /* 800 * This version of sbappend() should only be used when the caller 801 * absolutely knows that there will never be more than one record 802 * in the socket buffer, that is, a stream protocol (such as TCP). 803 */ 804 void 805 sbappendstream(struct sockbuf *sb, struct mbuf *m) 806 { 807 808 KASSERT(solocked(sb->sb_so)); 809 KDASSERT(m->m_nextpkt == NULL); 810 KASSERT(sb->sb_mb == sb->sb_lastrecord); 811 812 SBLASTMBUFCHK(sb, __func__); 813 814 #ifdef MBUFTRACE 815 m_claimm(m, sb->sb_mowner); 816 #endif 817 818 sbcompress(sb, m, sb->sb_mbtail); 819 820 sb->sb_lastrecord = sb->sb_mb; 821 SBLASTRECORDCHK(sb, __func__); 822 } 823 824 #ifdef SOCKBUF_DEBUG 825 void 826 sbcheck(struct sockbuf *sb) 827 { 828 struct mbuf *m, *m2; 829 u_long len, mbcnt; 830 831 KASSERT(solocked(sb->sb_so)); 832 833 len = 0; 834 mbcnt = 0; 835 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 836 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 837 len += m2->m_len; 838 mbcnt += MSIZE; 839 if (m2->m_flags & M_EXT) 840 mbcnt += m2->m_ext.ext_size; 841 if (m2->m_nextpkt != NULL) 842 panic("sbcheck nextpkt"); 843 } 844 } 845 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 846 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 847 mbcnt, sb->sb_mbcnt); 848 panic("sbcheck"); 849 } 850 } 851 #endif 852 853 /* 854 * As above, except the mbuf chain 855 * begins a new record. 856 */ 857 void 858 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 859 { 860 struct mbuf *m; 861 862 KASSERT(solocked(sb->sb_so)); 863 864 if (m0 == NULL) 865 return; 866 867 #ifdef MBUFTRACE 868 m_claimm(m0, sb->sb_mowner); 869 #endif 870 /* 871 * Put the first mbuf on the queue. 872 * Note this permits zero length records. 873 */ 874 sballoc(sb, m0); 875 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 876 SBLINKRECORD(sb, m0); 877 m = m0->m_next; 878 m0->m_next = 0; 879 if (m && (m0->m_flags & M_EOR)) { 880 m0->m_flags &= ~M_EOR; 881 m->m_flags |= M_EOR; 882 } 883 sbcompress(sb, m, m0); 884 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 885 } 886 887 /* 888 * As above except that OOB data 889 * is inserted at the beginning of the sockbuf, 890 * but after any other OOB data. 891 */ 892 void 893 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 894 { 895 struct mbuf *m, **mp; 896 897 KASSERT(solocked(sb->sb_so)); 898 899 if (m0 == NULL) 900 return; 901 902 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 903 904 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 905 again: 906 switch (m->m_type) { 907 908 case MT_OOBDATA: 909 continue; /* WANT next train */ 910 911 case MT_CONTROL: 912 if ((m = m->m_next) != NULL) 913 goto again; /* inspect THIS train further */ 914 } 915 break; 916 } 917 /* 918 * Put the first mbuf on the queue. 919 * Note this permits zero length records. 920 */ 921 sballoc(sb, m0); 922 m0->m_nextpkt = *mp; 923 if (*mp == NULL) { 924 /* m0 is actually the new tail */ 925 sb->sb_lastrecord = m0; 926 } 927 *mp = m0; 928 m = m0->m_next; 929 m0->m_next = 0; 930 if (m && (m0->m_flags & M_EOR)) { 931 m0->m_flags &= ~M_EOR; 932 m->m_flags |= M_EOR; 933 } 934 sbcompress(sb, m, m0); 935 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 936 } 937 938 /* 939 * Append address and data, and optionally, control (ancillary) data 940 * to the receive queue of a socket. If present, 941 * m0 must include a packet header with total length. 942 * Returns 0 if no space in sockbuf or insufficient mbufs. 943 */ 944 int 945 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 946 struct mbuf *control) 947 { 948 struct mbuf *m, *n, *nlast; 949 int space, len; 950 951 KASSERT(solocked(sb->sb_so)); 952 953 space = asa->sa_len; 954 955 if (m0 != NULL) { 956 if ((m0->m_flags & M_PKTHDR) == 0) 957 panic("sbappendaddr"); 958 space += m0->m_pkthdr.len; 959 #ifdef MBUFTRACE 960 m_claimm(m0, sb->sb_mowner); 961 #endif 962 } 963 for (n = control; n; n = n->m_next) { 964 space += n->m_len; 965 MCLAIM(n, sb->sb_mowner); 966 if (n->m_next == NULL) /* keep pointer to last control buf */ 967 break; 968 } 969 if (space > sbspace(sb)) 970 return (0); 971 m = m_get(M_DONTWAIT, MT_SONAME); 972 if (m == NULL) 973 return (0); 974 MCLAIM(m, sb->sb_mowner); 975 /* 976 * XXX avoid 'comparison always true' warning which isn't easily 977 * avoided. 978 */ 979 len = asa->sa_len; 980 if (len > MLEN) { 981 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 982 if ((m->m_flags & M_EXT) == 0) { 983 m_free(m); 984 return (0); 985 } 986 } 987 m->m_len = asa->sa_len; 988 memcpy(mtod(m, void *), asa, asa->sa_len); 989 if (n) 990 n->m_next = m0; /* concatenate data to control */ 991 else 992 control = m0; 993 m->m_next = control; 994 995 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 996 997 for (n = m; n->m_next != NULL; n = n->m_next) 998 sballoc(sb, n); 999 sballoc(sb, n); 1000 nlast = n; 1001 SBLINKRECORD(sb, m); 1002 1003 sb->sb_mbtail = nlast; 1004 SBLASTMBUFCHK(sb, "sbappendaddr"); 1005 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 1006 1007 return (1); 1008 } 1009 1010 /* 1011 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 1012 * an mbuf chain. 1013 */ 1014 static inline struct mbuf * 1015 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 1016 const struct sockaddr *asa) 1017 { 1018 struct mbuf *m; 1019 const int salen = asa->sa_len; 1020 1021 KASSERT(solocked(sb->sb_so)); 1022 1023 /* only the first in each chain need be a pkthdr */ 1024 m = m_gethdr(M_DONTWAIT, MT_SONAME); 1025 if (m == NULL) 1026 return NULL; 1027 MCLAIM(m, sb->sb_mowner); 1028 #ifdef notyet 1029 if (salen > MHLEN) { 1030 MEXTMALLOC(m, salen, M_NOWAIT); 1031 if ((m->m_flags & M_EXT) == 0) { 1032 m_free(m); 1033 return NULL; 1034 } 1035 } 1036 #else 1037 KASSERT(salen <= MHLEN); 1038 #endif 1039 m->m_len = salen; 1040 memcpy(mtod(m, void *), asa, salen); 1041 m->m_next = m0; 1042 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1043 1044 return m; 1045 } 1046 1047 int 1048 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1049 struct mbuf *m0, int sbprio) 1050 { 1051 struct mbuf *m, *n, *n0, *nlast; 1052 int error; 1053 1054 KASSERT(solocked(sb->sb_so)); 1055 1056 /* 1057 * XXX sbprio reserved for encoding priority of this* request: 1058 * SB_PRIO_NONE --> honour normal sb limits 1059 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1060 * take whole chain. Intended for large requests 1061 * that should be delivered atomically (all, or none). 1062 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1063 * over normal socket limits, for messages indicating 1064 * buffer overflow in earlier normal/lower-priority messages 1065 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1066 * Intended for kernel-generated messages only. 1067 * Up to generator to avoid total mbuf resource exhaustion. 1068 */ 1069 (void)sbprio; 1070 1071 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1072 panic("sbappendaddrchain"); 1073 1074 #ifdef notyet 1075 space = sbspace(sb); 1076 1077 /* 1078 * Enforce SB_PRIO_* limits as described above. 1079 */ 1080 #endif 1081 1082 n0 = NULL; 1083 nlast = NULL; 1084 for (m = m0; m; m = m->m_nextpkt) { 1085 struct mbuf *np; 1086 1087 #ifdef MBUFTRACE 1088 m_claimm(m, sb->sb_mowner); 1089 #endif 1090 1091 /* Prepend sockaddr to this record (m) of input chain m0 */ 1092 n = m_prepend_sockaddr(sb, m, asa); 1093 if (n == NULL) { 1094 error = ENOBUFS; 1095 goto bad; 1096 } 1097 1098 /* Append record (asa+m) to end of new chain n0 */ 1099 if (n0 == NULL) { 1100 n0 = n; 1101 } else { 1102 nlast->m_nextpkt = n; 1103 } 1104 /* Keep track of last record on new chain */ 1105 nlast = n; 1106 1107 for (np = n; np; np = np->m_next) 1108 sballoc(sb, np); 1109 } 1110 1111 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1112 1113 /* Drop the entire chain of (asa+m) records onto the socket */ 1114 SBLINKRECORDCHAIN(sb, n0, nlast); 1115 1116 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1117 1118 for (m = nlast; m->m_next; m = m->m_next) 1119 ; 1120 sb->sb_mbtail = m; 1121 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1122 1123 return (1); 1124 1125 bad: 1126 /* 1127 * On error, free the prepended addreseses. For consistency 1128 * with sbappendaddr(), leave it to our caller to free 1129 * the input record chain passed to us as m0. 1130 */ 1131 while ((n = n0) != NULL) { 1132 struct mbuf *np; 1133 1134 /* Undo the sballoc() of this record */ 1135 for (np = n; np; np = np->m_next) 1136 sbfree(sb, np); 1137 1138 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1139 MFREE(n, np); /* free prepended address (not data) */ 1140 } 1141 return error; 1142 } 1143 1144 1145 int 1146 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1147 { 1148 struct mbuf *m, *mlast, *n; 1149 int space; 1150 1151 KASSERT(solocked(sb->sb_so)); 1152 1153 space = 0; 1154 if (control == NULL) 1155 panic("sbappendcontrol"); 1156 for (m = control; ; m = m->m_next) { 1157 space += m->m_len; 1158 MCLAIM(m, sb->sb_mowner); 1159 if (m->m_next == NULL) 1160 break; 1161 } 1162 n = m; /* save pointer to last control buffer */ 1163 for (m = m0; m; m = m->m_next) { 1164 MCLAIM(m, sb->sb_mowner); 1165 space += m->m_len; 1166 } 1167 if (space > sbspace(sb)) 1168 return (0); 1169 n->m_next = m0; /* concatenate data to control */ 1170 1171 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1172 1173 for (m = control; m->m_next != NULL; m = m->m_next) 1174 sballoc(sb, m); 1175 sballoc(sb, m); 1176 mlast = m; 1177 SBLINKRECORD(sb, control); 1178 1179 sb->sb_mbtail = mlast; 1180 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1181 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1182 1183 return (1); 1184 } 1185 1186 /* 1187 * Compress mbuf chain m into the socket 1188 * buffer sb following mbuf n. If n 1189 * is null, the buffer is presumed empty. 1190 */ 1191 void 1192 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1193 { 1194 int eor; 1195 struct mbuf *o; 1196 1197 KASSERT(solocked(sb->sb_so)); 1198 1199 eor = 0; 1200 while (m) { 1201 eor |= m->m_flags & M_EOR; 1202 if (m->m_len == 0 && 1203 (eor == 0 || 1204 (((o = m->m_next) || (o = n)) && 1205 o->m_type == m->m_type))) { 1206 if (sb->sb_lastrecord == m) 1207 sb->sb_lastrecord = m->m_next; 1208 m = m_free(m); 1209 continue; 1210 } 1211 if (n && (n->m_flags & M_EOR) == 0 && 1212 /* M_TRAILINGSPACE() checks buffer writeability */ 1213 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1214 m->m_len <= M_TRAILINGSPACE(n) && 1215 n->m_type == m->m_type) { 1216 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1217 (unsigned)m->m_len); 1218 n->m_len += m->m_len; 1219 sb->sb_cc += m->m_len; 1220 m = m_free(m); 1221 continue; 1222 } 1223 if (n) 1224 n->m_next = m; 1225 else 1226 sb->sb_mb = m; 1227 sb->sb_mbtail = m; 1228 sballoc(sb, m); 1229 n = m; 1230 m->m_flags &= ~M_EOR; 1231 m = m->m_next; 1232 n->m_next = 0; 1233 } 1234 if (eor) { 1235 if (n) 1236 n->m_flags |= eor; 1237 else 1238 printf("semi-panic: sbcompress\n"); 1239 } 1240 SBLASTMBUFCHK(sb, __func__); 1241 } 1242 1243 /* 1244 * Free all mbufs in a sockbuf. 1245 * Check that all resources are reclaimed. 1246 */ 1247 void 1248 sbflush(struct sockbuf *sb) 1249 { 1250 1251 KASSERT(solocked(sb->sb_so)); 1252 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1253 1254 while (sb->sb_mbcnt) 1255 sbdrop(sb, (int)sb->sb_cc); 1256 1257 KASSERT(sb->sb_cc == 0); 1258 KASSERT(sb->sb_mb == NULL); 1259 KASSERT(sb->sb_mbtail == NULL); 1260 KASSERT(sb->sb_lastrecord == NULL); 1261 } 1262 1263 /* 1264 * Drop data from (the front of) a sockbuf. 1265 */ 1266 void 1267 sbdrop(struct sockbuf *sb, int len) 1268 { 1269 struct mbuf *m, *mn, *next; 1270 1271 KASSERT(solocked(sb->sb_so)); 1272 1273 next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; 1274 while (len > 0) { 1275 if (m == NULL) { 1276 if (next == NULL) 1277 panic("sbdrop(%p,%d): cc=%lu", 1278 sb, len, sb->sb_cc); 1279 m = next; 1280 next = m->m_nextpkt; 1281 continue; 1282 } 1283 if (m->m_len > len) { 1284 m->m_len -= len; 1285 m->m_data += len; 1286 sb->sb_cc -= len; 1287 break; 1288 } 1289 len -= m->m_len; 1290 sbfree(sb, m); 1291 MFREE(m, mn); 1292 m = mn; 1293 } 1294 while (m && m->m_len == 0) { 1295 sbfree(sb, m); 1296 MFREE(m, mn); 1297 m = mn; 1298 } 1299 if (m) { 1300 sb->sb_mb = m; 1301 m->m_nextpkt = next; 1302 } else 1303 sb->sb_mb = next; 1304 /* 1305 * First part is an inline SB_EMPTY_FIXUP(). Second part 1306 * makes sure sb_lastrecord is up-to-date if we dropped 1307 * part of the last record. 1308 */ 1309 m = sb->sb_mb; 1310 if (m == NULL) { 1311 sb->sb_mbtail = NULL; 1312 sb->sb_lastrecord = NULL; 1313 } else if (m->m_nextpkt == NULL) 1314 sb->sb_lastrecord = m; 1315 } 1316 1317 /* 1318 * Drop a record off the front of a sockbuf 1319 * and move the next record to the front. 1320 */ 1321 void 1322 sbdroprecord(struct sockbuf *sb) 1323 { 1324 struct mbuf *m, *mn; 1325 1326 KASSERT(solocked(sb->sb_so)); 1327 1328 m = sb->sb_mb; 1329 if (m) { 1330 sb->sb_mb = m->m_nextpkt; 1331 do { 1332 sbfree(sb, m); 1333 MFREE(m, mn); 1334 } while ((m = mn) != NULL); 1335 } 1336 SB_EMPTY_FIXUP(sb); 1337 } 1338 1339 /* 1340 * Create a "control" mbuf containing the specified data 1341 * with the specified type for presentation on a socket buffer. 1342 */ 1343 struct mbuf * 1344 sbcreatecontrol1(void **p, int size, int type, int level, int flags) 1345 { 1346 struct cmsghdr *cp; 1347 struct mbuf *m; 1348 int space = CMSG_SPACE(size); 1349 1350 if ((flags & M_DONTWAIT) && space > MCLBYTES) { 1351 printf("%s: message too large %d\n", __func__, space); 1352 return NULL; 1353 } 1354 1355 if ((m = m_get(flags, MT_CONTROL)) == NULL) 1356 return NULL; 1357 if (space > MLEN) { 1358 if (space > MCLBYTES) 1359 MEXTMALLOC(m, space, M_WAITOK); 1360 else 1361 MCLGET(m, flags); 1362 if ((m->m_flags & M_EXT) == 0) { 1363 m_free(m); 1364 return NULL; 1365 } 1366 } 1367 cp = mtod(m, struct cmsghdr *); 1368 *p = CMSG_DATA(cp); 1369 m->m_len = space; 1370 cp->cmsg_len = CMSG_LEN(size); 1371 cp->cmsg_level = level; 1372 cp->cmsg_type = type; 1373 return m; 1374 } 1375 1376 struct mbuf * 1377 sbcreatecontrol(void *p, int size, int type, int level) 1378 { 1379 struct mbuf *m; 1380 void *v; 1381 1382 m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT); 1383 if (m == NULL) 1384 return NULL; 1385 memcpy(v, p, size); 1386 return m; 1387 } 1388 1389 void 1390 solockretry(struct socket *so, kmutex_t *lock) 1391 { 1392 1393 while (lock != so->so_lock) { 1394 mutex_exit(lock); 1395 lock = so->so_lock; 1396 mutex_enter(lock); 1397 } 1398 } 1399 1400 bool 1401 solocked(struct socket *so) 1402 { 1403 1404 return mutex_owned(so->so_lock); 1405 } 1406 1407 bool 1408 solocked2(struct socket *so1, struct socket *so2) 1409 { 1410 kmutex_t *lock; 1411 1412 lock = so1->so_lock; 1413 if (lock != so2->so_lock) 1414 return false; 1415 return mutex_owned(lock); 1416 } 1417 1418 /* 1419 * sosetlock: assign a default lock to a new socket. 1420 */ 1421 void 1422 sosetlock(struct socket *so) 1423 { 1424 if (so->so_lock == NULL) { 1425 kmutex_t *lock = softnet_lock; 1426 1427 so->so_lock = lock; 1428 mutex_obj_hold(lock); 1429 mutex_enter(lock); 1430 } 1431 KASSERT(solocked(so)); 1432 } 1433 1434 /* 1435 * Set lock on sockbuf sb; sleep if lock is already held. 1436 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1437 * Returns error without lock if sleep is interrupted. 1438 */ 1439 int 1440 sblock(struct sockbuf *sb, int wf) 1441 { 1442 struct socket *so; 1443 kmutex_t *lock; 1444 int error; 1445 1446 KASSERT(solocked(sb->sb_so)); 1447 1448 for (;;) { 1449 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1450 sb->sb_flags |= SB_LOCK; 1451 return 0; 1452 } 1453 if (wf != M_WAITOK) 1454 return EWOULDBLOCK; 1455 so = sb->sb_so; 1456 lock = so->so_lock; 1457 if ((sb->sb_flags & SB_NOINTR) != 0) { 1458 cv_wait(&so->so_cv, lock); 1459 error = 0; 1460 } else 1461 error = cv_wait_sig(&so->so_cv, lock); 1462 if (__predict_false(lock != so->so_lock)) 1463 solockretry(so, lock); 1464 if (error != 0) 1465 return error; 1466 } 1467 } 1468 1469 void 1470 sbunlock(struct sockbuf *sb) 1471 { 1472 struct socket *so; 1473 1474 so = sb->sb_so; 1475 1476 KASSERT(solocked(so)); 1477 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1478 1479 sb->sb_flags &= ~SB_LOCK; 1480 cv_broadcast(&so->so_cv); 1481 } 1482 1483 int 1484 sowait(struct socket *so, bool catch_p, int timo) 1485 { 1486 kmutex_t *lock; 1487 int error; 1488 1489 KASSERT(solocked(so)); 1490 KASSERT(catch_p || timo != 0); 1491 1492 lock = so->so_lock; 1493 if (catch_p) 1494 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1495 else 1496 error = cv_timedwait(&so->so_cv, lock, timo); 1497 if (__predict_false(lock != so->so_lock)) 1498 solockretry(so, lock); 1499 return error; 1500 } 1501