1 /* $NetBSD: uipc_socket2.c,v 1.100 2008/10/24 22:23:20 dyoung Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.100 2008/10/24 22:23:20 dyoung Exp $"); 62 63 #include "opt_mbuftrace.h" 64 #include "opt_sb_max.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/proc.h> 69 #include <sys/file.h> 70 #include <sys/buf.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/protosw.h> 74 #include <sys/domain.h> 75 #include <sys/poll.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/signalvar.h> 79 #include <sys/kauth.h> 80 #include <sys/pool.h> 81 #include <sys/uidinfo.h> 82 83 /* 84 * Primitive routines for operating on sockets and socket buffers. 85 * 86 * Locking rules and assumptions: 87 * 88 * o socket::so_lock can change on the fly. The low level routines used 89 * to lock sockets are aware of this. When so_lock is acquired, the 90 * routine locking must check to see if so_lock still points to the 91 * lock that was acquired. If so_lock has changed in the meantime, the 92 * now irellevant lock that was acquired must be dropped and the lock 93 * operation retried. Although not proven here, this is completely safe 94 * on a multiprocessor system, even with relaxed memory ordering, given 95 * the next two rules: 96 * 97 * o In order to mutate so_lock, the lock pointed to by the current value 98 * of so_lock must be held: i.e., the socket must be held locked by the 99 * changing thread. The thread must issue membar_exit() to prevent 100 * memory accesses being reordered, and can set so_lock to the desired 101 * value. If the lock pointed to by the new value of so_lock is not 102 * held by the changing thread, the socket must then be considered 103 * unlocked. 104 * 105 * o If so_lock is mutated, and the previous lock referred to by so_lock 106 * could still be visible to other threads in the system (e.g. via file 107 * descriptor or protocol-internal reference), then the old lock must 108 * remain valid until the socket and/or protocol control block has been 109 * torn down. 110 * 111 * o If a socket has a non-NULL so_head value (i.e. is in the process of 112 * connecting), then locking the socket must also lock the socket pointed 113 * to by so_head: their lock pointers must match. 114 * 115 * o If a socket has connections in progress (so_q, so_q0 not empty) then 116 * locking the socket must also lock the sockets attached to both queues. 117 * Again, their lock pointers must match. 118 * 119 * o Beyond the initial lock assigment in socreate(), assigning locks to 120 * sockets is the responsibility of the individual protocols / protocol 121 * domains. 122 */ 123 124 static pool_cache_t socket_cache; 125 126 u_long sb_max = SB_MAX; /* maximum socket buffer size */ 127 static u_long sb_max_adj; /* adjusted sb_max */ 128 129 /* 130 * Procedures to manipulate state flags of socket 131 * and do appropriate wakeups. Normal sequence from the 132 * active (originating) side is that soisconnecting() is 133 * called during processing of connect() call, 134 * resulting in an eventual call to soisconnected() if/when the 135 * connection is established. When the connection is torn down 136 * soisdisconnecting() is called during processing of disconnect() call, 137 * and soisdisconnected() is called when the connection to the peer 138 * is totally severed. The semantics of these routines are such that 139 * connectionless protocols can call soisconnected() and soisdisconnected() 140 * only, bypassing the in-progress calls when setting up a ``connection'' 141 * takes no time. 142 * 143 * From the passive side, a socket is created with 144 * two queues of sockets: so_q0 for connections in progress 145 * and so_q for connections already made and awaiting user acceptance. 146 * As a protocol is preparing incoming connections, it creates a socket 147 * structure queued on so_q0 by calling sonewconn(). When the connection 148 * is established, soisconnected() is called, and transfers the 149 * socket structure to so_q, making it available to accept(). 150 * 151 * If a socket is closed with sockets on either 152 * so_q0 or so_q, these sockets are dropped. 153 * 154 * If higher level protocols are implemented in 155 * the kernel, the wakeups done here will sometimes 156 * cause software-interrupt process scheduling. 157 */ 158 159 void 160 soisconnecting(struct socket *so) 161 { 162 163 KASSERT(solocked(so)); 164 165 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 166 so->so_state |= SS_ISCONNECTING; 167 } 168 169 void 170 soisconnected(struct socket *so) 171 { 172 struct socket *head; 173 174 head = so->so_head; 175 176 KASSERT(solocked(so)); 177 KASSERT(head == NULL || solocked2(so, head)); 178 179 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 180 so->so_state |= SS_ISCONNECTED; 181 if (head && so->so_onq == &head->so_q0) { 182 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 183 soqremque(so, 0); 184 soqinsque(head, so, 1); 185 sorwakeup(head); 186 cv_broadcast(&head->so_cv); 187 } else { 188 so->so_upcall = 189 head->so_accf->so_accept_filter->accf_callback; 190 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 191 so->so_rcv.sb_flags |= SB_UPCALL; 192 so->so_options &= ~SO_ACCEPTFILTER; 193 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); } 194 } else { 195 cv_broadcast(&so->so_cv); 196 sorwakeup(so); 197 sowwakeup(so); 198 } 199 } 200 201 void 202 soisdisconnecting(struct socket *so) 203 { 204 205 KASSERT(solocked(so)); 206 207 so->so_state &= ~SS_ISCONNECTING; 208 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 209 cv_broadcast(&so->so_cv); 210 sowwakeup(so); 211 sorwakeup(so); 212 } 213 214 void 215 soisdisconnected(struct socket *so) 216 { 217 218 KASSERT(solocked(so)); 219 220 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 221 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 222 cv_broadcast(&so->so_cv); 223 sowwakeup(so); 224 sorwakeup(so); 225 } 226 227 void 228 soinit2(void) 229 { 230 231 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 232 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 233 } 234 235 /* 236 * When an attempt at a new connection is noted on a socket 237 * which accepts connections, sonewconn is called. If the 238 * connection is possible (subject to space constraints, etc.) 239 * then we allocate a new structure, propoerly linked into the 240 * data structure of the original socket, and return this. 241 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 242 */ 243 struct socket * 244 sonewconn(struct socket *head, int connstatus) 245 { 246 struct socket *so; 247 int soqueue, error; 248 249 KASSERT(solocked(head)); 250 251 if ((head->so_options & SO_ACCEPTFILTER) != 0) 252 connstatus = 0; 253 soqueue = connstatus ? 1 : 0; 254 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 255 return NULL; 256 so = soget(false); 257 if (so == NULL) 258 return NULL; 259 mutex_obj_hold(head->so_lock); 260 so->so_lock = head->so_lock; 261 so->so_type = head->so_type; 262 so->so_options = head->so_options &~ SO_ACCEPTCONN; 263 so->so_linger = head->so_linger; 264 so->so_state = head->so_state | SS_NOFDREF; 265 so->so_nbio = head->so_nbio; 266 so->so_proto = head->so_proto; 267 so->so_timeo = head->so_timeo; 268 so->so_pgid = head->so_pgid; 269 so->so_send = head->so_send; 270 so->so_receive = head->so_receive; 271 so->so_uidinfo = head->so_uidinfo; 272 so->so_egid = head->so_egid; 273 so->so_cpid = head->so_cpid; 274 #ifdef MBUFTRACE 275 so->so_mowner = head->so_mowner; 276 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 277 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 278 #endif 279 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 280 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 281 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 282 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 283 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 284 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 285 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 286 soqinsque(head, so, soqueue); 287 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 288 NULL, NULL); 289 KASSERT(solocked(so)); 290 if (error != 0) { 291 (void) soqremque(so, soqueue); 292 /* 293 * Remove acccept filter if one is present. 294 * XXX Is this really needed? 295 */ 296 if (so->so_accf != NULL) 297 (void)accept_filt_clear(so); 298 soput(so); 299 return NULL; 300 } 301 if (connstatus) { 302 sorwakeup(head); 303 cv_broadcast(&head->so_cv); 304 so->so_state |= connstatus; 305 } 306 return so; 307 } 308 309 struct socket * 310 soget(bool waitok) 311 { 312 struct socket *so; 313 314 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 315 if (__predict_false(so == NULL)) 316 return (NULL); 317 memset(so, 0, sizeof(*so)); 318 TAILQ_INIT(&so->so_q0); 319 TAILQ_INIT(&so->so_q); 320 cv_init(&so->so_cv, "socket"); 321 cv_init(&so->so_rcv.sb_cv, "netio"); 322 cv_init(&so->so_snd.sb_cv, "netio"); 323 selinit(&so->so_rcv.sb_sel); 324 selinit(&so->so_snd.sb_sel); 325 so->so_rcv.sb_so = so; 326 so->so_snd.sb_so = so; 327 return so; 328 } 329 330 void 331 soput(struct socket *so) 332 { 333 334 KASSERT(!cv_has_waiters(&so->so_cv)); 335 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 336 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 337 seldestroy(&so->so_rcv.sb_sel); 338 seldestroy(&so->so_snd.sb_sel); 339 mutex_obj_free(so->so_lock); 340 cv_destroy(&so->so_cv); 341 cv_destroy(&so->so_rcv.sb_cv); 342 cv_destroy(&so->so_snd.sb_cv); 343 pool_cache_put(socket_cache, so); 344 } 345 346 void 347 soqinsque(struct socket *head, struct socket *so, int q) 348 { 349 350 KASSERT(solocked2(head, so)); 351 352 #ifdef DIAGNOSTIC 353 if (so->so_onq != NULL) 354 panic("soqinsque"); 355 #endif 356 357 so->so_head = head; 358 if (q == 0) { 359 head->so_q0len++; 360 so->so_onq = &head->so_q0; 361 } else { 362 head->so_qlen++; 363 so->so_onq = &head->so_q; 364 } 365 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 366 } 367 368 int 369 soqremque(struct socket *so, int q) 370 { 371 struct socket *head; 372 373 head = so->so_head; 374 375 KASSERT(solocked(so)); 376 if (q == 0) { 377 if (so->so_onq != &head->so_q0) 378 return (0); 379 head->so_q0len--; 380 } else { 381 if (so->so_onq != &head->so_q) 382 return (0); 383 head->so_qlen--; 384 } 385 KASSERT(solocked2(so, head)); 386 TAILQ_REMOVE(so->so_onq, so, so_qe); 387 so->so_onq = NULL; 388 so->so_head = NULL; 389 return (1); 390 } 391 392 /* 393 * Socantsendmore indicates that no more data will be sent on the 394 * socket; it would normally be applied to a socket when the user 395 * informs the system that no more data is to be sent, by the protocol 396 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 397 * will be received, and will normally be applied to the socket by a 398 * protocol when it detects that the peer will send no more data. 399 * Data queued for reading in the socket may yet be read. 400 */ 401 402 void 403 socantsendmore(struct socket *so) 404 { 405 406 KASSERT(solocked(so)); 407 408 so->so_state |= SS_CANTSENDMORE; 409 sowwakeup(so); 410 } 411 412 void 413 socantrcvmore(struct socket *so) 414 { 415 416 KASSERT(solocked(so)); 417 418 so->so_state |= SS_CANTRCVMORE; 419 sorwakeup(so); 420 } 421 422 /* 423 * Wait for data to arrive at/drain from a socket buffer. 424 */ 425 int 426 sbwait(struct sockbuf *sb) 427 { 428 struct socket *so; 429 kmutex_t *lock; 430 int error; 431 432 so = sb->sb_so; 433 434 KASSERT(solocked(so)); 435 436 sb->sb_flags |= SB_NOTIFY; 437 lock = so->so_lock; 438 if ((sb->sb_flags & SB_NOINTR) != 0) 439 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 440 else 441 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 442 if (__predict_false(lock != so->so_lock)) 443 solockretry(so, lock); 444 return error; 445 } 446 447 /* 448 * Wakeup processes waiting on a socket buffer. 449 * Do asynchronous notification via SIGIO 450 * if the socket buffer has the SB_ASYNC flag set. 451 */ 452 void 453 sowakeup(struct socket *so, struct sockbuf *sb, int code) 454 { 455 int band; 456 457 KASSERT(solocked(so)); 458 KASSERT(sb->sb_so == so); 459 460 if (code == POLL_IN) 461 band = POLLIN|POLLRDNORM; 462 else 463 band = POLLOUT|POLLWRNORM; 464 sb->sb_flags &= ~SB_NOTIFY; 465 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 466 cv_broadcast(&sb->sb_cv); 467 if (sb->sb_flags & SB_ASYNC) 468 fownsignal(so->so_pgid, SIGIO, code, band, so); 469 if (sb->sb_flags & SB_UPCALL) 470 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 471 } 472 473 /* 474 * Reset a socket's lock pointer. Wake all threads waiting on the 475 * socket's condition variables so that they can restart their waits 476 * using the new lock. The existing lock must be held. 477 */ 478 void 479 solockreset(struct socket *so, kmutex_t *lock) 480 { 481 482 KASSERT(solocked(so)); 483 484 so->so_lock = lock; 485 cv_broadcast(&so->so_snd.sb_cv); 486 cv_broadcast(&so->so_rcv.sb_cv); 487 cv_broadcast(&so->so_cv); 488 } 489 490 /* 491 * Socket buffer (struct sockbuf) utility routines. 492 * 493 * Each socket contains two socket buffers: one for sending data and 494 * one for receiving data. Each buffer contains a queue of mbufs, 495 * information about the number of mbufs and amount of data in the 496 * queue, and other fields allowing poll() statements and notification 497 * on data availability to be implemented. 498 * 499 * Data stored in a socket buffer is maintained as a list of records. 500 * Each record is a list of mbufs chained together with the m_next 501 * field. Records are chained together with the m_nextpkt field. The upper 502 * level routine soreceive() expects the following conventions to be 503 * observed when placing information in the receive buffer: 504 * 505 * 1. If the protocol requires each message be preceded by the sender's 506 * name, then a record containing that name must be present before 507 * any associated data (mbuf's must be of type MT_SONAME). 508 * 2. If the protocol supports the exchange of ``access rights'' (really 509 * just additional data associated with the message), and there are 510 * ``rights'' to be received, then a record containing this data 511 * should be present (mbuf's must be of type MT_CONTROL). 512 * 3. If a name or rights record exists, then it must be followed by 513 * a data record, perhaps of zero length. 514 * 515 * Before using a new socket structure it is first necessary to reserve 516 * buffer space to the socket, by calling sbreserve(). This should commit 517 * some of the available buffer space in the system buffer pool for the 518 * socket (currently, it does nothing but enforce limits). The space 519 * should be released by calling sbrelease() when the socket is destroyed. 520 */ 521 522 int 523 sb_max_set(u_long new_sbmax) 524 { 525 int s; 526 527 if (new_sbmax < (16 * 1024)) 528 return (EINVAL); 529 530 s = splsoftnet(); 531 sb_max = new_sbmax; 532 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 533 splx(s); 534 535 return (0); 536 } 537 538 int 539 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 540 { 541 542 KASSERT(so->so_lock == NULL || solocked(so)); 543 544 /* 545 * there's at least one application (a configure script of screen) 546 * which expects a fifo is writable even if it has "some" bytes 547 * in its buffer. 548 * so we want to make sure (hiwat - lowat) >= (some bytes). 549 * 550 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 551 * we expect it's large enough for such applications. 552 */ 553 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 554 u_long hiwat = lowat + PIPE_BUF; 555 556 if (sndcc < hiwat) 557 sndcc = hiwat; 558 if (sbreserve(&so->so_snd, sndcc, so) == 0) 559 goto bad; 560 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 561 goto bad2; 562 if (so->so_rcv.sb_lowat == 0) 563 so->so_rcv.sb_lowat = 1; 564 if (so->so_snd.sb_lowat == 0) 565 so->so_snd.sb_lowat = lowat; 566 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 567 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 568 return (0); 569 bad2: 570 sbrelease(&so->so_snd, so); 571 bad: 572 return (ENOBUFS); 573 } 574 575 /* 576 * Allot mbufs to a sockbuf. 577 * Attempt to scale mbmax so that mbcnt doesn't become limiting 578 * if buffering efficiency is near the normal case. 579 */ 580 int 581 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 582 { 583 struct lwp *l = curlwp; /* XXX */ 584 rlim_t maxcc; 585 struct uidinfo *uidinfo; 586 587 KASSERT(so->so_lock == NULL || solocked(so)); 588 KASSERT(sb->sb_so == so); 589 KASSERT(sb_max_adj != 0); 590 591 if (cc == 0 || cc > sb_max_adj) 592 return (0); 593 594 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 595 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 596 else 597 maxcc = RLIM_INFINITY; 598 599 uidinfo = so->so_uidinfo; 600 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 601 return 0; 602 sb->sb_mbmax = min(cc * 2, sb_max); 603 if (sb->sb_lowat > sb->sb_hiwat) 604 sb->sb_lowat = sb->sb_hiwat; 605 return (1); 606 } 607 608 /* 609 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 610 * that the socket is held locked here: see sorflush(). 611 */ 612 void 613 sbrelease(struct sockbuf *sb, struct socket *so) 614 { 615 616 KASSERT(sb->sb_so == so); 617 618 sbflush(sb); 619 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 620 sb->sb_mbmax = 0; 621 } 622 623 /* 624 * Routines to add and remove 625 * data from an mbuf queue. 626 * 627 * The routines sbappend() or sbappendrecord() are normally called to 628 * append new mbufs to a socket buffer, after checking that adequate 629 * space is available, comparing the function sbspace() with the amount 630 * of data to be added. sbappendrecord() differs from sbappend() in 631 * that data supplied is treated as the beginning of a new record. 632 * To place a sender's address, optional access rights, and data in a 633 * socket receive buffer, sbappendaddr() should be used. To place 634 * access rights and data in a socket receive buffer, sbappendrights() 635 * should be used. In either case, the new data begins a new record. 636 * Note that unlike sbappend() and sbappendrecord(), these routines check 637 * for the caller that there will be enough space to store the data. 638 * Each fails if there is not enough space, or if it cannot find mbufs 639 * to store additional information in. 640 * 641 * Reliable protocols may use the socket send buffer to hold data 642 * awaiting acknowledgement. Data is normally copied from a socket 643 * send buffer in a protocol with m_copy for output to a peer, 644 * and then removing the data from the socket buffer with sbdrop() 645 * or sbdroprecord() when the data is acknowledged by the peer. 646 */ 647 648 #ifdef SOCKBUF_DEBUG 649 void 650 sblastrecordchk(struct sockbuf *sb, const char *where) 651 { 652 struct mbuf *m = sb->sb_mb; 653 654 KASSERT(solocked(sb->sb_so)); 655 656 while (m && m->m_nextpkt) 657 m = m->m_nextpkt; 658 659 if (m != sb->sb_lastrecord) { 660 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 661 sb->sb_mb, sb->sb_lastrecord, m); 662 printf("packet chain:\n"); 663 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 664 printf("\t%p\n", m); 665 panic("sblastrecordchk from %s", where); 666 } 667 } 668 669 void 670 sblastmbufchk(struct sockbuf *sb, const char *where) 671 { 672 struct mbuf *m = sb->sb_mb; 673 struct mbuf *n; 674 675 KASSERT(solocked(sb->sb_so)); 676 677 while (m && m->m_nextpkt) 678 m = m->m_nextpkt; 679 680 while (m && m->m_next) 681 m = m->m_next; 682 683 if (m != sb->sb_mbtail) { 684 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 685 sb->sb_mb, sb->sb_mbtail, m); 686 printf("packet tree:\n"); 687 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 688 printf("\t"); 689 for (n = m; n != NULL; n = n->m_next) 690 printf("%p ", n); 691 printf("\n"); 692 } 693 panic("sblastmbufchk from %s", where); 694 } 695 } 696 #endif /* SOCKBUF_DEBUG */ 697 698 /* 699 * Link a chain of records onto a socket buffer 700 */ 701 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 702 do { \ 703 if ((sb)->sb_lastrecord != NULL) \ 704 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 705 else \ 706 (sb)->sb_mb = (m0); \ 707 (sb)->sb_lastrecord = (mlast); \ 708 } while (/*CONSTCOND*/0) 709 710 711 #define SBLINKRECORD(sb, m0) \ 712 SBLINKRECORDCHAIN(sb, m0, m0) 713 714 /* 715 * Append mbuf chain m to the last record in the 716 * socket buffer sb. The additional space associated 717 * the mbuf chain is recorded in sb. Empty mbufs are 718 * discarded and mbufs are compacted where possible. 719 */ 720 void 721 sbappend(struct sockbuf *sb, struct mbuf *m) 722 { 723 struct mbuf *n; 724 725 KASSERT(solocked(sb->sb_so)); 726 727 if (m == 0) 728 return; 729 730 #ifdef MBUFTRACE 731 m_claimm(m, sb->sb_mowner); 732 #endif 733 734 SBLASTRECORDCHK(sb, "sbappend 1"); 735 736 if ((n = sb->sb_lastrecord) != NULL) { 737 /* 738 * XXX Would like to simply use sb_mbtail here, but 739 * XXX I need to verify that I won't miss an EOR that 740 * XXX way. 741 */ 742 do { 743 if (n->m_flags & M_EOR) { 744 sbappendrecord(sb, m); /* XXXXXX!!!! */ 745 return; 746 } 747 } while (n->m_next && (n = n->m_next)); 748 } else { 749 /* 750 * If this is the first record in the socket buffer, it's 751 * also the last record. 752 */ 753 sb->sb_lastrecord = m; 754 } 755 sbcompress(sb, m, n); 756 SBLASTRECORDCHK(sb, "sbappend 2"); 757 } 758 759 /* 760 * This version of sbappend() should only be used when the caller 761 * absolutely knows that there will never be more than one record 762 * in the socket buffer, that is, a stream protocol (such as TCP). 763 */ 764 void 765 sbappendstream(struct sockbuf *sb, struct mbuf *m) 766 { 767 768 KASSERT(solocked(sb->sb_so)); 769 KDASSERT(m->m_nextpkt == NULL); 770 KASSERT(sb->sb_mb == sb->sb_lastrecord); 771 772 SBLASTMBUFCHK(sb, __func__); 773 774 #ifdef MBUFTRACE 775 m_claimm(m, sb->sb_mowner); 776 #endif 777 778 sbcompress(sb, m, sb->sb_mbtail); 779 780 sb->sb_lastrecord = sb->sb_mb; 781 SBLASTRECORDCHK(sb, __func__); 782 } 783 784 #ifdef SOCKBUF_DEBUG 785 void 786 sbcheck(struct sockbuf *sb) 787 { 788 struct mbuf *m, *m2; 789 u_long len, mbcnt; 790 791 KASSERT(solocked(sb->sb_so)); 792 793 len = 0; 794 mbcnt = 0; 795 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 796 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 797 len += m2->m_len; 798 mbcnt += MSIZE; 799 if (m2->m_flags & M_EXT) 800 mbcnt += m2->m_ext.ext_size; 801 if (m2->m_nextpkt != NULL) 802 panic("sbcheck nextpkt"); 803 } 804 } 805 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 806 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 807 mbcnt, sb->sb_mbcnt); 808 panic("sbcheck"); 809 } 810 } 811 #endif 812 813 /* 814 * As above, except the mbuf chain 815 * begins a new record. 816 */ 817 void 818 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 819 { 820 struct mbuf *m; 821 822 KASSERT(solocked(sb->sb_so)); 823 824 if (m0 == 0) 825 return; 826 827 #ifdef MBUFTRACE 828 m_claimm(m0, sb->sb_mowner); 829 #endif 830 /* 831 * Put the first mbuf on the queue. 832 * Note this permits zero length records. 833 */ 834 sballoc(sb, m0); 835 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 836 SBLINKRECORD(sb, m0); 837 m = m0->m_next; 838 m0->m_next = 0; 839 if (m && (m0->m_flags & M_EOR)) { 840 m0->m_flags &= ~M_EOR; 841 m->m_flags |= M_EOR; 842 } 843 sbcompress(sb, m, m0); 844 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 845 } 846 847 /* 848 * As above except that OOB data 849 * is inserted at the beginning of the sockbuf, 850 * but after any other OOB data. 851 */ 852 void 853 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 854 { 855 struct mbuf *m, **mp; 856 857 KASSERT(solocked(sb->sb_so)); 858 859 if (m0 == 0) 860 return; 861 862 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 863 864 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 865 again: 866 switch (m->m_type) { 867 868 case MT_OOBDATA: 869 continue; /* WANT next train */ 870 871 case MT_CONTROL: 872 if ((m = m->m_next) != NULL) 873 goto again; /* inspect THIS train further */ 874 } 875 break; 876 } 877 /* 878 * Put the first mbuf on the queue. 879 * Note this permits zero length records. 880 */ 881 sballoc(sb, m0); 882 m0->m_nextpkt = *mp; 883 if (*mp == NULL) { 884 /* m0 is actually the new tail */ 885 sb->sb_lastrecord = m0; 886 } 887 *mp = m0; 888 m = m0->m_next; 889 m0->m_next = 0; 890 if (m && (m0->m_flags & M_EOR)) { 891 m0->m_flags &= ~M_EOR; 892 m->m_flags |= M_EOR; 893 } 894 sbcompress(sb, m, m0); 895 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 896 } 897 898 /* 899 * Append address and data, and optionally, control (ancillary) data 900 * to the receive queue of a socket. If present, 901 * m0 must include a packet header with total length. 902 * Returns 0 if no space in sockbuf or insufficient mbufs. 903 */ 904 int 905 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 906 struct mbuf *control) 907 { 908 struct mbuf *m, *n, *nlast; 909 int space, len; 910 911 KASSERT(solocked(sb->sb_so)); 912 913 space = asa->sa_len; 914 915 if (m0 != NULL) { 916 if ((m0->m_flags & M_PKTHDR) == 0) 917 panic("sbappendaddr"); 918 space += m0->m_pkthdr.len; 919 #ifdef MBUFTRACE 920 m_claimm(m0, sb->sb_mowner); 921 #endif 922 } 923 for (n = control; n; n = n->m_next) { 924 space += n->m_len; 925 MCLAIM(n, sb->sb_mowner); 926 if (n->m_next == 0) /* keep pointer to last control buf */ 927 break; 928 } 929 if (space > sbspace(sb)) 930 return (0); 931 MGET(m, M_DONTWAIT, MT_SONAME); 932 if (m == 0) 933 return (0); 934 MCLAIM(m, sb->sb_mowner); 935 /* 936 * XXX avoid 'comparison always true' warning which isn't easily 937 * avoided. 938 */ 939 len = asa->sa_len; 940 if (len > MLEN) { 941 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 942 if ((m->m_flags & M_EXT) == 0) { 943 m_free(m); 944 return (0); 945 } 946 } 947 m->m_len = asa->sa_len; 948 memcpy(mtod(m, void *), asa, asa->sa_len); 949 if (n) 950 n->m_next = m0; /* concatenate data to control */ 951 else 952 control = m0; 953 m->m_next = control; 954 955 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 956 957 for (n = m; n->m_next != NULL; n = n->m_next) 958 sballoc(sb, n); 959 sballoc(sb, n); 960 nlast = n; 961 SBLINKRECORD(sb, m); 962 963 sb->sb_mbtail = nlast; 964 SBLASTMBUFCHK(sb, "sbappendaddr"); 965 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 966 967 return (1); 968 } 969 970 /* 971 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 972 * an mbuf chain. 973 */ 974 static inline struct mbuf * 975 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 976 const struct sockaddr *asa) 977 { 978 struct mbuf *m; 979 const int salen = asa->sa_len; 980 981 KASSERT(solocked(sb->sb_so)); 982 983 /* only the first in each chain need be a pkthdr */ 984 MGETHDR(m, M_DONTWAIT, MT_SONAME); 985 if (m == 0) 986 return (0); 987 MCLAIM(m, sb->sb_mowner); 988 #ifdef notyet 989 if (salen > MHLEN) { 990 MEXTMALLOC(m, salen, M_NOWAIT); 991 if ((m->m_flags & M_EXT) == 0) { 992 m_free(m); 993 return (0); 994 } 995 } 996 #else 997 KASSERT(salen <= MHLEN); 998 #endif 999 m->m_len = salen; 1000 memcpy(mtod(m, void *), asa, salen); 1001 m->m_next = m0; 1002 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1003 1004 return m; 1005 } 1006 1007 int 1008 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1009 struct mbuf *m0, int sbprio) 1010 { 1011 int space; 1012 struct mbuf *m, *n, *n0, *nlast; 1013 int error; 1014 1015 KASSERT(solocked(sb->sb_so)); 1016 1017 /* 1018 * XXX sbprio reserved for encoding priority of this* request: 1019 * SB_PRIO_NONE --> honour normal sb limits 1020 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1021 * take whole chain. Intended for large requests 1022 * that should be delivered atomically (all, or none). 1023 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1024 * over normal socket limits, for messages indicating 1025 * buffer overflow in earlier normal/lower-priority messages 1026 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1027 * Intended for kernel-generated messages only. 1028 * Up to generator to avoid total mbuf resource exhaustion. 1029 */ 1030 (void)sbprio; 1031 1032 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1033 panic("sbappendaddrchain"); 1034 1035 space = sbspace(sb); 1036 1037 #ifdef notyet 1038 /* 1039 * Enforce SB_PRIO_* limits as described above. 1040 */ 1041 #endif 1042 1043 n0 = NULL; 1044 nlast = NULL; 1045 for (m = m0; m; m = m->m_nextpkt) { 1046 struct mbuf *np; 1047 1048 #ifdef MBUFTRACE 1049 m_claimm(m, sb->sb_mowner); 1050 #endif 1051 1052 /* Prepend sockaddr to this record (m) of input chain m0 */ 1053 n = m_prepend_sockaddr(sb, m, asa); 1054 if (n == NULL) { 1055 error = ENOBUFS; 1056 goto bad; 1057 } 1058 1059 /* Append record (asa+m) to end of new chain n0 */ 1060 if (n0 == NULL) { 1061 n0 = n; 1062 } else { 1063 nlast->m_nextpkt = n; 1064 } 1065 /* Keep track of last record on new chain */ 1066 nlast = n; 1067 1068 for (np = n; np; np = np->m_next) 1069 sballoc(sb, np); 1070 } 1071 1072 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1073 1074 /* Drop the entire chain of (asa+m) records onto the socket */ 1075 SBLINKRECORDCHAIN(sb, n0, nlast); 1076 1077 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1078 1079 for (m = nlast; m->m_next; m = m->m_next) 1080 ; 1081 sb->sb_mbtail = m; 1082 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1083 1084 return (1); 1085 1086 bad: 1087 /* 1088 * On error, free the prepended addreseses. For consistency 1089 * with sbappendaddr(), leave it to our caller to free 1090 * the input record chain passed to us as m0. 1091 */ 1092 while ((n = n0) != NULL) { 1093 struct mbuf *np; 1094 1095 /* Undo the sballoc() of this record */ 1096 for (np = n; np; np = np->m_next) 1097 sbfree(sb, np); 1098 1099 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1100 MFREE(n, np); /* free prepended address (not data) */ 1101 } 1102 return 0; 1103 } 1104 1105 1106 int 1107 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1108 { 1109 struct mbuf *m, *mlast, *n; 1110 int space; 1111 1112 KASSERT(solocked(sb->sb_so)); 1113 1114 space = 0; 1115 if (control == 0) 1116 panic("sbappendcontrol"); 1117 for (m = control; ; m = m->m_next) { 1118 space += m->m_len; 1119 MCLAIM(m, sb->sb_mowner); 1120 if (m->m_next == 0) 1121 break; 1122 } 1123 n = m; /* save pointer to last control buffer */ 1124 for (m = m0; m; m = m->m_next) { 1125 MCLAIM(m, sb->sb_mowner); 1126 space += m->m_len; 1127 } 1128 if (space > sbspace(sb)) 1129 return (0); 1130 n->m_next = m0; /* concatenate data to control */ 1131 1132 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1133 1134 for (m = control; m->m_next != NULL; m = m->m_next) 1135 sballoc(sb, m); 1136 sballoc(sb, m); 1137 mlast = m; 1138 SBLINKRECORD(sb, control); 1139 1140 sb->sb_mbtail = mlast; 1141 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1142 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1143 1144 return (1); 1145 } 1146 1147 /* 1148 * Compress mbuf chain m into the socket 1149 * buffer sb following mbuf n. If n 1150 * is null, the buffer is presumed empty. 1151 */ 1152 void 1153 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1154 { 1155 int eor; 1156 struct mbuf *o; 1157 1158 KASSERT(solocked(sb->sb_so)); 1159 1160 eor = 0; 1161 while (m) { 1162 eor |= m->m_flags & M_EOR; 1163 if (m->m_len == 0 && 1164 (eor == 0 || 1165 (((o = m->m_next) || (o = n)) && 1166 o->m_type == m->m_type))) { 1167 if (sb->sb_lastrecord == m) 1168 sb->sb_lastrecord = m->m_next; 1169 m = m_free(m); 1170 continue; 1171 } 1172 if (n && (n->m_flags & M_EOR) == 0 && 1173 /* M_TRAILINGSPACE() checks buffer writeability */ 1174 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1175 m->m_len <= M_TRAILINGSPACE(n) && 1176 n->m_type == m->m_type) { 1177 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1178 (unsigned)m->m_len); 1179 n->m_len += m->m_len; 1180 sb->sb_cc += m->m_len; 1181 m = m_free(m); 1182 continue; 1183 } 1184 if (n) 1185 n->m_next = m; 1186 else 1187 sb->sb_mb = m; 1188 sb->sb_mbtail = m; 1189 sballoc(sb, m); 1190 n = m; 1191 m->m_flags &= ~M_EOR; 1192 m = m->m_next; 1193 n->m_next = 0; 1194 } 1195 if (eor) { 1196 if (n) 1197 n->m_flags |= eor; 1198 else 1199 printf("semi-panic: sbcompress\n"); 1200 } 1201 SBLASTMBUFCHK(sb, __func__); 1202 } 1203 1204 /* 1205 * Free all mbufs in a sockbuf. 1206 * Check that all resources are reclaimed. 1207 */ 1208 void 1209 sbflush(struct sockbuf *sb) 1210 { 1211 1212 KASSERT(solocked(sb->sb_so)); 1213 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1214 1215 while (sb->sb_mbcnt) 1216 sbdrop(sb, (int)sb->sb_cc); 1217 1218 KASSERT(sb->sb_cc == 0); 1219 KASSERT(sb->sb_mb == NULL); 1220 KASSERT(sb->sb_mbtail == NULL); 1221 KASSERT(sb->sb_lastrecord == NULL); 1222 } 1223 1224 /* 1225 * Drop data from (the front of) a sockbuf. 1226 */ 1227 void 1228 sbdrop(struct sockbuf *sb, int len) 1229 { 1230 struct mbuf *m, *mn, *next; 1231 1232 KASSERT(solocked(sb->sb_so)); 1233 1234 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1235 while (len > 0) { 1236 if (m == 0) { 1237 if (next == 0) 1238 panic("sbdrop"); 1239 m = next; 1240 next = m->m_nextpkt; 1241 continue; 1242 } 1243 if (m->m_len > len) { 1244 m->m_len -= len; 1245 m->m_data += len; 1246 sb->sb_cc -= len; 1247 break; 1248 } 1249 len -= m->m_len; 1250 sbfree(sb, m); 1251 MFREE(m, mn); 1252 m = mn; 1253 } 1254 while (m && m->m_len == 0) { 1255 sbfree(sb, m); 1256 MFREE(m, mn); 1257 m = mn; 1258 } 1259 if (m) { 1260 sb->sb_mb = m; 1261 m->m_nextpkt = next; 1262 } else 1263 sb->sb_mb = next; 1264 /* 1265 * First part is an inline SB_EMPTY_FIXUP(). Second part 1266 * makes sure sb_lastrecord is up-to-date if we dropped 1267 * part of the last record. 1268 */ 1269 m = sb->sb_mb; 1270 if (m == NULL) { 1271 sb->sb_mbtail = NULL; 1272 sb->sb_lastrecord = NULL; 1273 } else if (m->m_nextpkt == NULL) 1274 sb->sb_lastrecord = m; 1275 } 1276 1277 /* 1278 * Drop a record off the front of a sockbuf 1279 * and move the next record to the front. 1280 */ 1281 void 1282 sbdroprecord(struct sockbuf *sb) 1283 { 1284 struct mbuf *m, *mn; 1285 1286 KASSERT(solocked(sb->sb_so)); 1287 1288 m = sb->sb_mb; 1289 if (m) { 1290 sb->sb_mb = m->m_nextpkt; 1291 do { 1292 sbfree(sb, m); 1293 MFREE(m, mn); 1294 } while ((m = mn) != NULL); 1295 } 1296 SB_EMPTY_FIXUP(sb); 1297 } 1298 1299 /* 1300 * Create a "control" mbuf containing the specified data 1301 * with the specified type for presentation on a socket buffer. 1302 */ 1303 struct mbuf * 1304 sbcreatecontrol(void *p, int size, int type, int level) 1305 { 1306 struct cmsghdr *cp; 1307 struct mbuf *m; 1308 1309 if (CMSG_SPACE(size) > MCLBYTES) { 1310 printf("sbcreatecontrol: message too large %d\n", size); 1311 return NULL; 1312 } 1313 1314 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1315 return ((struct mbuf *) NULL); 1316 if (CMSG_SPACE(size) > MLEN) { 1317 MCLGET(m, M_DONTWAIT); 1318 if ((m->m_flags & M_EXT) == 0) { 1319 m_free(m); 1320 return NULL; 1321 } 1322 } 1323 cp = mtod(m, struct cmsghdr *); 1324 memcpy(CMSG_DATA(cp), p, size); 1325 m->m_len = CMSG_SPACE(size); 1326 cp->cmsg_len = CMSG_LEN(size); 1327 cp->cmsg_level = level; 1328 cp->cmsg_type = type; 1329 return (m); 1330 } 1331 1332 void 1333 solockretry(struct socket *so, kmutex_t *lock) 1334 { 1335 1336 while (lock != so->so_lock) { 1337 mutex_exit(lock); 1338 lock = so->so_lock; 1339 mutex_enter(lock); 1340 } 1341 } 1342 1343 bool 1344 solocked(struct socket *so) 1345 { 1346 1347 return mutex_owned(so->so_lock); 1348 } 1349 1350 bool 1351 solocked2(struct socket *so1, struct socket *so2) 1352 { 1353 kmutex_t *lock; 1354 1355 lock = so1->so_lock; 1356 if (lock != so2->so_lock) 1357 return false; 1358 return mutex_owned(lock); 1359 } 1360 1361 /* 1362 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1363 * protocols that do not have special locking requirements. 1364 */ 1365 void 1366 sosetlock(struct socket *so) 1367 { 1368 kmutex_t *lock; 1369 1370 if (so->so_lock == NULL) { 1371 lock = softnet_lock; 1372 so->so_lock = lock; 1373 mutex_obj_hold(lock); 1374 mutex_enter(lock); 1375 } 1376 1377 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1378 KASSERT(solocked(so)); 1379 } 1380 1381 /* 1382 * Set lock on sockbuf sb; sleep if lock is already held. 1383 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1384 * Returns error without lock if sleep is interrupted. 1385 */ 1386 int 1387 sblock(struct sockbuf *sb, int wf) 1388 { 1389 struct socket *so; 1390 kmutex_t *lock; 1391 int error; 1392 1393 KASSERT(solocked(sb->sb_so)); 1394 1395 for (;;) { 1396 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1397 sb->sb_flags |= SB_LOCK; 1398 return 0; 1399 } 1400 if (wf != M_WAITOK) 1401 return EWOULDBLOCK; 1402 so = sb->sb_so; 1403 lock = so->so_lock; 1404 if ((sb->sb_flags & SB_NOINTR) != 0) { 1405 cv_wait(&so->so_cv, lock); 1406 error = 0; 1407 } else 1408 error = cv_wait_sig(&so->so_cv, lock); 1409 if (__predict_false(lock != so->so_lock)) 1410 solockretry(so, lock); 1411 if (error != 0) 1412 return error; 1413 } 1414 } 1415 1416 void 1417 sbunlock(struct sockbuf *sb) 1418 { 1419 struct socket *so; 1420 1421 so = sb->sb_so; 1422 1423 KASSERT(solocked(so)); 1424 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1425 1426 sb->sb_flags &= ~SB_LOCK; 1427 cv_broadcast(&so->so_cv); 1428 } 1429 1430 int 1431 sowait(struct socket *so, int timo) 1432 { 1433 kmutex_t *lock; 1434 int error; 1435 1436 KASSERT(solocked(so)); 1437 1438 lock = so->so_lock; 1439 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1440 if (__predict_false(lock != so->so_lock)) 1441 solockretry(so, lock); 1442 return error; 1443 } 1444