1 /* $NetBSD: uipc_socket2.c,v 1.104 2009/09/02 14:56:57 tls Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.104 2009/09/02 14:56:57 tls Exp $"); 62 63 #include "opt_mbuftrace.h" 64 #include "opt_sb_max.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/proc.h> 69 #include <sys/file.h> 70 #include <sys/buf.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/protosw.h> 74 #include <sys/domain.h> 75 #include <sys/poll.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/signalvar.h> 79 #include <sys/kauth.h> 80 #include <sys/pool.h> 81 #include <sys/uidinfo.h> 82 83 /* 84 * Primitive routines for operating on sockets and socket buffers. 85 * 86 * Locking rules and assumptions: 87 * 88 * o socket::so_lock can change on the fly. The low level routines used 89 * to lock sockets are aware of this. When so_lock is acquired, the 90 * routine locking must check to see if so_lock still points to the 91 * lock that was acquired. If so_lock has changed in the meantime, the 92 * now irellevant lock that was acquired must be dropped and the lock 93 * operation retried. Although not proven here, this is completely safe 94 * on a multiprocessor system, even with relaxed memory ordering, given 95 * the next two rules: 96 * 97 * o In order to mutate so_lock, the lock pointed to by the current value 98 * of so_lock must be held: i.e., the socket must be held locked by the 99 * changing thread. The thread must issue membar_exit() to prevent 100 * memory accesses being reordered, and can set so_lock to the desired 101 * value. If the lock pointed to by the new value of so_lock is not 102 * held by the changing thread, the socket must then be considered 103 * unlocked. 104 * 105 * o If so_lock is mutated, and the previous lock referred to by so_lock 106 * could still be visible to other threads in the system (e.g. via file 107 * descriptor or protocol-internal reference), then the old lock must 108 * remain valid until the socket and/or protocol control block has been 109 * torn down. 110 * 111 * o If a socket has a non-NULL so_head value (i.e. is in the process of 112 * connecting), then locking the socket must also lock the socket pointed 113 * to by so_head: their lock pointers must match. 114 * 115 * o If a socket has connections in progress (so_q, so_q0 not empty) then 116 * locking the socket must also lock the sockets attached to both queues. 117 * Again, their lock pointers must match. 118 * 119 * o Beyond the initial lock assigment in socreate(), assigning locks to 120 * sockets is the responsibility of the individual protocols / protocol 121 * domains. 122 */ 123 124 static pool_cache_t socket_cache; 125 126 u_long sb_max = SB_MAX; /* maximum socket buffer size */ 127 static u_long sb_max_adj; /* adjusted sb_max */ 128 129 /* 130 * Procedures to manipulate state flags of socket 131 * and do appropriate wakeups. Normal sequence from the 132 * active (originating) side is that soisconnecting() is 133 * called during processing of connect() call, 134 * resulting in an eventual call to soisconnected() if/when the 135 * connection is established. When the connection is torn down 136 * soisdisconnecting() is called during processing of disconnect() call, 137 * and soisdisconnected() is called when the connection to the peer 138 * is totally severed. The semantics of these routines are such that 139 * connectionless protocols can call soisconnected() and soisdisconnected() 140 * only, bypassing the in-progress calls when setting up a ``connection'' 141 * takes no time. 142 * 143 * From the passive side, a socket is created with 144 * two queues of sockets: so_q0 for connections in progress 145 * and so_q for connections already made and awaiting user acceptance. 146 * As a protocol is preparing incoming connections, it creates a socket 147 * structure queued on so_q0 by calling sonewconn(). When the connection 148 * is established, soisconnected() is called, and transfers the 149 * socket structure to so_q, making it available to accept(). 150 * 151 * If a socket is closed with sockets on either 152 * so_q0 or so_q, these sockets are dropped. 153 * 154 * If higher level protocols are implemented in 155 * the kernel, the wakeups done here will sometimes 156 * cause software-interrupt process scheduling. 157 */ 158 159 void 160 soisconnecting(struct socket *so) 161 { 162 163 KASSERT(solocked(so)); 164 165 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 166 so->so_state |= SS_ISCONNECTING; 167 } 168 169 void 170 soisconnected(struct socket *so) 171 { 172 struct socket *head; 173 174 head = so->so_head; 175 176 KASSERT(solocked(so)); 177 KASSERT(head == NULL || solocked2(so, head)); 178 179 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 180 so->so_state |= SS_ISCONNECTED; 181 if (head && so->so_onq == &head->so_q0) { 182 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 183 soqremque(so, 0); 184 soqinsque(head, so, 1); 185 sorwakeup(head); 186 cv_broadcast(&head->so_cv); 187 } else { 188 so->so_upcall = 189 head->so_accf->so_accept_filter->accf_callback; 190 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 191 so->so_rcv.sb_flags |= SB_UPCALL; 192 so->so_options &= ~SO_ACCEPTFILTER; 193 (*so->so_upcall)(so, so->so_upcallarg, 194 POLLIN|POLLRDNORM, M_DONTWAIT); 195 } 196 } else { 197 cv_broadcast(&so->so_cv); 198 sorwakeup(so); 199 sowwakeup(so); 200 } 201 } 202 203 void 204 soisdisconnecting(struct socket *so) 205 { 206 207 KASSERT(solocked(so)); 208 209 so->so_state &= ~SS_ISCONNECTING; 210 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 211 cv_broadcast(&so->so_cv); 212 sowwakeup(so); 213 sorwakeup(so); 214 } 215 216 void 217 soisdisconnected(struct socket *so) 218 { 219 220 KASSERT(solocked(so)); 221 222 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 223 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 224 cv_broadcast(&so->so_cv); 225 sowwakeup(so); 226 sorwakeup(so); 227 } 228 229 void 230 soinit2(void) 231 { 232 233 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 234 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 235 } 236 237 /* 238 * When an attempt at a new connection is noted on a socket 239 * which accepts connections, sonewconn is called. If the 240 * connection is possible (subject to space constraints, etc.) 241 * then we allocate a new structure, propoerly linked into the 242 * data structure of the original socket, and return this. 243 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 244 */ 245 struct socket * 246 sonewconn(struct socket *head, int connstatus) 247 { 248 struct socket *so; 249 int soqueue, error; 250 251 KASSERT(connstatus == 0 || connstatus == SS_ISCONFIRMING || 252 connstatus == SS_ISCONNECTED); 253 KASSERT(solocked(head)); 254 255 if ((head->so_options & SO_ACCEPTFILTER) != 0) 256 connstatus = 0; 257 soqueue = connstatus ? 1 : 0; 258 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 259 return NULL; 260 so = soget(false); 261 if (so == NULL) 262 return NULL; 263 mutex_obj_hold(head->so_lock); 264 so->so_lock = head->so_lock; 265 so->so_type = head->so_type; 266 so->so_options = head->so_options &~ SO_ACCEPTCONN; 267 so->so_linger = head->so_linger; 268 so->so_state = head->so_state | SS_NOFDREF; 269 so->so_nbio = head->so_nbio; 270 so->so_proto = head->so_proto; 271 so->so_timeo = head->so_timeo; 272 so->so_pgid = head->so_pgid; 273 so->so_send = head->so_send; 274 so->so_receive = head->so_receive; 275 so->so_uidinfo = head->so_uidinfo; 276 so->so_egid = head->so_egid; 277 so->so_cpid = head->so_cpid; 278 #ifdef MBUFTRACE 279 so->so_mowner = head->so_mowner; 280 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 281 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 282 #endif 283 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0) 284 goto out; 285 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 286 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 287 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 288 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 289 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 290 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 291 soqinsque(head, so, soqueue); 292 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 293 NULL, NULL); 294 KASSERT(solocked(so)); 295 if (error != 0) { 296 (void) soqremque(so, soqueue); 297 out: 298 /* 299 * Remove acccept filter if one is present. 300 * XXX Is this really needed? 301 */ 302 if (so->so_accf != NULL) 303 (void)accept_filt_clear(so); 304 soput(so); 305 return NULL; 306 } 307 if (connstatus) { 308 sorwakeup(head); 309 cv_broadcast(&head->so_cv); 310 so->so_state |= connstatus; 311 } 312 return so; 313 } 314 315 struct socket * 316 soget(bool waitok) 317 { 318 struct socket *so; 319 320 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 321 if (__predict_false(so == NULL)) 322 return (NULL); 323 memset(so, 0, sizeof(*so)); 324 TAILQ_INIT(&so->so_q0); 325 TAILQ_INIT(&so->so_q); 326 cv_init(&so->so_cv, "socket"); 327 cv_init(&so->so_rcv.sb_cv, "netio"); 328 cv_init(&so->so_snd.sb_cv, "netio"); 329 selinit(&so->so_rcv.sb_sel); 330 selinit(&so->so_snd.sb_sel); 331 so->so_rcv.sb_so = so; 332 so->so_snd.sb_so = so; 333 return so; 334 } 335 336 void 337 soput(struct socket *so) 338 { 339 340 KASSERT(!cv_has_waiters(&so->so_cv)); 341 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 342 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 343 seldestroy(&so->so_rcv.sb_sel); 344 seldestroy(&so->so_snd.sb_sel); 345 mutex_obj_free(so->so_lock); 346 cv_destroy(&so->so_cv); 347 cv_destroy(&so->so_rcv.sb_cv); 348 cv_destroy(&so->so_snd.sb_cv); 349 pool_cache_put(socket_cache, so); 350 } 351 352 void 353 soqinsque(struct socket *head, struct socket *so, int q) 354 { 355 356 KASSERT(solocked2(head, so)); 357 358 #ifdef DIAGNOSTIC 359 if (so->so_onq != NULL) 360 panic("soqinsque"); 361 #endif 362 363 so->so_head = head; 364 if (q == 0) { 365 head->so_q0len++; 366 so->so_onq = &head->so_q0; 367 } else { 368 head->so_qlen++; 369 so->so_onq = &head->so_q; 370 } 371 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 372 } 373 374 int 375 soqremque(struct socket *so, int q) 376 { 377 struct socket *head; 378 379 head = so->so_head; 380 381 KASSERT(solocked(so)); 382 if (q == 0) { 383 if (so->so_onq != &head->so_q0) 384 return (0); 385 head->so_q0len--; 386 } else { 387 if (so->so_onq != &head->so_q) 388 return (0); 389 head->so_qlen--; 390 } 391 KASSERT(solocked2(so, head)); 392 TAILQ_REMOVE(so->so_onq, so, so_qe); 393 so->so_onq = NULL; 394 so->so_head = NULL; 395 return (1); 396 } 397 398 /* 399 * Socantsendmore indicates that no more data will be sent on the 400 * socket; it would normally be applied to a socket when the user 401 * informs the system that no more data is to be sent, by the protocol 402 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 403 * will be received, and will normally be applied to the socket by a 404 * protocol when it detects that the peer will send no more data. 405 * Data queued for reading in the socket may yet be read. 406 */ 407 408 void 409 socantsendmore(struct socket *so) 410 { 411 412 KASSERT(solocked(so)); 413 414 so->so_state |= SS_CANTSENDMORE; 415 sowwakeup(so); 416 } 417 418 void 419 socantrcvmore(struct socket *so) 420 { 421 422 KASSERT(solocked(so)); 423 424 so->so_state |= SS_CANTRCVMORE; 425 sorwakeup(so); 426 } 427 428 /* 429 * Wait for data to arrive at/drain from a socket buffer. 430 */ 431 int 432 sbwait(struct sockbuf *sb) 433 { 434 struct socket *so; 435 kmutex_t *lock; 436 int error; 437 438 so = sb->sb_so; 439 440 KASSERT(solocked(so)); 441 442 sb->sb_flags |= SB_NOTIFY; 443 lock = so->so_lock; 444 if ((sb->sb_flags & SB_NOINTR) != 0) 445 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 446 else 447 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 448 if (__predict_false(lock != so->so_lock)) 449 solockretry(so, lock); 450 return error; 451 } 452 453 /* 454 * Wakeup processes waiting on a socket buffer. 455 * Do asynchronous notification via SIGIO 456 * if the socket buffer has the SB_ASYNC flag set. 457 */ 458 void 459 sowakeup(struct socket *so, struct sockbuf *sb, int code) 460 { 461 int band; 462 463 KASSERT(solocked(so)); 464 KASSERT(sb->sb_so == so); 465 466 if (code == POLL_IN) 467 band = POLLIN|POLLRDNORM; 468 else 469 band = POLLOUT|POLLWRNORM; 470 sb->sb_flags &= ~SB_NOTIFY; 471 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 472 cv_broadcast(&sb->sb_cv); 473 if (sb->sb_flags & SB_ASYNC) 474 fownsignal(so->so_pgid, SIGIO, code, band, so); 475 if (sb->sb_flags & SB_UPCALL) 476 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 477 } 478 479 /* 480 * Reset a socket's lock pointer. Wake all threads waiting on the 481 * socket's condition variables so that they can restart their waits 482 * using the new lock. The existing lock must be held. 483 */ 484 void 485 solockreset(struct socket *so, kmutex_t *lock) 486 { 487 488 KASSERT(solocked(so)); 489 490 so->so_lock = lock; 491 cv_broadcast(&so->so_snd.sb_cv); 492 cv_broadcast(&so->so_rcv.sb_cv); 493 cv_broadcast(&so->so_cv); 494 } 495 496 /* 497 * Socket buffer (struct sockbuf) utility routines. 498 * 499 * Each socket contains two socket buffers: one for sending data and 500 * one for receiving data. Each buffer contains a queue of mbufs, 501 * information about the number of mbufs and amount of data in the 502 * queue, and other fields allowing poll() statements and notification 503 * on data availability to be implemented. 504 * 505 * Data stored in a socket buffer is maintained as a list of records. 506 * Each record is a list of mbufs chained together with the m_next 507 * field. Records are chained together with the m_nextpkt field. The upper 508 * level routine soreceive() expects the following conventions to be 509 * observed when placing information in the receive buffer: 510 * 511 * 1. If the protocol requires each message be preceded by the sender's 512 * name, then a record containing that name must be present before 513 * any associated data (mbuf's must be of type MT_SONAME). 514 * 2. If the protocol supports the exchange of ``access rights'' (really 515 * just additional data associated with the message), and there are 516 * ``rights'' to be received, then a record containing this data 517 * should be present (mbuf's must be of type MT_CONTROL). 518 * 3. If a name or rights record exists, then it must be followed by 519 * a data record, perhaps of zero length. 520 * 521 * Before using a new socket structure it is first necessary to reserve 522 * buffer space to the socket, by calling sbreserve(). This should commit 523 * some of the available buffer space in the system buffer pool for the 524 * socket (currently, it does nothing but enforce limits). The space 525 * should be released by calling sbrelease() when the socket is destroyed. 526 */ 527 528 int 529 sb_max_set(u_long new_sbmax) 530 { 531 int s; 532 533 if (new_sbmax < (16 * 1024)) 534 return (EINVAL); 535 536 s = splsoftnet(); 537 sb_max = new_sbmax; 538 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 539 splx(s); 540 541 return (0); 542 } 543 544 int 545 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 546 { 547 548 KASSERT(so->so_lock == NULL || solocked(so)); 549 550 /* 551 * there's at least one application (a configure script of screen) 552 * which expects a fifo is writable even if it has "some" bytes 553 * in its buffer. 554 * so we want to make sure (hiwat - lowat) >= (some bytes). 555 * 556 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 557 * we expect it's large enough for such applications. 558 */ 559 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 560 u_long hiwat = lowat + PIPE_BUF; 561 562 if (sndcc < hiwat) 563 sndcc = hiwat; 564 if (sbreserve(&so->so_snd, sndcc, so) == 0) 565 goto bad; 566 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 567 goto bad2; 568 if (so->so_rcv.sb_lowat == 0) 569 so->so_rcv.sb_lowat = 1; 570 if (so->so_snd.sb_lowat == 0) 571 so->so_snd.sb_lowat = lowat; 572 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 573 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 574 return (0); 575 bad2: 576 sbrelease(&so->so_snd, so); 577 bad: 578 return (ENOBUFS); 579 } 580 581 /* 582 * Allot mbufs to a sockbuf. 583 * Attempt to scale mbmax so that mbcnt doesn't become limiting 584 * if buffering efficiency is near the normal case. 585 */ 586 int 587 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 588 { 589 struct lwp *l = curlwp; /* XXX */ 590 rlim_t maxcc; 591 struct uidinfo *uidinfo; 592 593 KASSERT(so->so_lock == NULL || solocked(so)); 594 KASSERT(sb->sb_so == so); 595 KASSERT(sb_max_adj != 0); 596 597 if (cc == 0 || cc > sb_max_adj) 598 return (0); 599 600 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 601 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 602 else 603 maxcc = RLIM_INFINITY; 604 605 uidinfo = so->so_uidinfo; 606 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 607 return 0; 608 sb->sb_mbmax = min(cc * 2, sb_max); 609 if (sb->sb_lowat > sb->sb_hiwat) 610 sb->sb_lowat = sb->sb_hiwat; 611 return (1); 612 } 613 614 /* 615 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 616 * that the socket is held locked here: see sorflush(). 617 */ 618 void 619 sbrelease(struct sockbuf *sb, struct socket *so) 620 { 621 622 KASSERT(sb->sb_so == so); 623 624 sbflush(sb); 625 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 626 sb->sb_mbmax = 0; 627 } 628 629 /* 630 * Routines to add and remove 631 * data from an mbuf queue. 632 * 633 * The routines sbappend() or sbappendrecord() are normally called to 634 * append new mbufs to a socket buffer, after checking that adequate 635 * space is available, comparing the function sbspace() with the amount 636 * of data to be added. sbappendrecord() differs from sbappend() in 637 * that data supplied is treated as the beginning of a new record. 638 * To place a sender's address, optional access rights, and data in a 639 * socket receive buffer, sbappendaddr() should be used. To place 640 * access rights and data in a socket receive buffer, sbappendrights() 641 * should be used. In either case, the new data begins a new record. 642 * Note that unlike sbappend() and sbappendrecord(), these routines check 643 * for the caller that there will be enough space to store the data. 644 * Each fails if there is not enough space, or if it cannot find mbufs 645 * to store additional information in. 646 * 647 * Reliable protocols may use the socket send buffer to hold data 648 * awaiting acknowledgement. Data is normally copied from a socket 649 * send buffer in a protocol with m_copy for output to a peer, 650 * and then removing the data from the socket buffer with sbdrop() 651 * or sbdroprecord() when the data is acknowledged by the peer. 652 */ 653 654 #ifdef SOCKBUF_DEBUG 655 void 656 sblastrecordchk(struct sockbuf *sb, const char *where) 657 { 658 struct mbuf *m = sb->sb_mb; 659 660 KASSERT(solocked(sb->sb_so)); 661 662 while (m && m->m_nextpkt) 663 m = m->m_nextpkt; 664 665 if (m != sb->sb_lastrecord) { 666 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 667 sb->sb_mb, sb->sb_lastrecord, m); 668 printf("packet chain:\n"); 669 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 670 printf("\t%p\n", m); 671 panic("sblastrecordchk from %s", where); 672 } 673 } 674 675 void 676 sblastmbufchk(struct sockbuf *sb, const char *where) 677 { 678 struct mbuf *m = sb->sb_mb; 679 struct mbuf *n; 680 681 KASSERT(solocked(sb->sb_so)); 682 683 while (m && m->m_nextpkt) 684 m = m->m_nextpkt; 685 686 while (m && m->m_next) 687 m = m->m_next; 688 689 if (m != sb->sb_mbtail) { 690 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 691 sb->sb_mb, sb->sb_mbtail, m); 692 printf("packet tree:\n"); 693 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 694 printf("\t"); 695 for (n = m; n != NULL; n = n->m_next) 696 printf("%p ", n); 697 printf("\n"); 698 } 699 panic("sblastmbufchk from %s", where); 700 } 701 } 702 #endif /* SOCKBUF_DEBUG */ 703 704 /* 705 * Link a chain of records onto a socket buffer 706 */ 707 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 708 do { \ 709 if ((sb)->sb_lastrecord != NULL) \ 710 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 711 else \ 712 (sb)->sb_mb = (m0); \ 713 (sb)->sb_lastrecord = (mlast); \ 714 } while (/*CONSTCOND*/0) 715 716 717 #define SBLINKRECORD(sb, m0) \ 718 SBLINKRECORDCHAIN(sb, m0, m0) 719 720 /* 721 * Append mbuf chain m to the last record in the 722 * socket buffer sb. The additional space associated 723 * the mbuf chain is recorded in sb. Empty mbufs are 724 * discarded and mbufs are compacted where possible. 725 */ 726 void 727 sbappend(struct sockbuf *sb, struct mbuf *m) 728 { 729 struct mbuf *n; 730 731 KASSERT(solocked(sb->sb_so)); 732 733 if (m == 0) 734 return; 735 736 #ifdef MBUFTRACE 737 m_claimm(m, sb->sb_mowner); 738 #endif 739 740 SBLASTRECORDCHK(sb, "sbappend 1"); 741 742 if ((n = sb->sb_lastrecord) != NULL) { 743 /* 744 * XXX Would like to simply use sb_mbtail here, but 745 * XXX I need to verify that I won't miss an EOR that 746 * XXX way. 747 */ 748 do { 749 if (n->m_flags & M_EOR) { 750 sbappendrecord(sb, m); /* XXXXXX!!!! */ 751 return; 752 } 753 } while (n->m_next && (n = n->m_next)); 754 } else { 755 /* 756 * If this is the first record in the socket buffer, it's 757 * also the last record. 758 */ 759 sb->sb_lastrecord = m; 760 } 761 sbcompress(sb, m, n); 762 SBLASTRECORDCHK(sb, "sbappend 2"); 763 } 764 765 /* 766 * This version of sbappend() should only be used when the caller 767 * absolutely knows that there will never be more than one record 768 * in the socket buffer, that is, a stream protocol (such as TCP). 769 */ 770 void 771 sbappendstream(struct sockbuf *sb, struct mbuf *m) 772 { 773 774 KASSERT(solocked(sb->sb_so)); 775 KDASSERT(m->m_nextpkt == NULL); 776 KASSERT(sb->sb_mb == sb->sb_lastrecord); 777 778 SBLASTMBUFCHK(sb, __func__); 779 780 #ifdef MBUFTRACE 781 m_claimm(m, sb->sb_mowner); 782 #endif 783 784 sbcompress(sb, m, sb->sb_mbtail); 785 786 sb->sb_lastrecord = sb->sb_mb; 787 SBLASTRECORDCHK(sb, __func__); 788 } 789 790 #ifdef SOCKBUF_DEBUG 791 void 792 sbcheck(struct sockbuf *sb) 793 { 794 struct mbuf *m, *m2; 795 u_long len, mbcnt; 796 797 KASSERT(solocked(sb->sb_so)); 798 799 len = 0; 800 mbcnt = 0; 801 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 802 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 803 len += m2->m_len; 804 mbcnt += MSIZE; 805 if (m2->m_flags & M_EXT) 806 mbcnt += m2->m_ext.ext_size; 807 if (m2->m_nextpkt != NULL) 808 panic("sbcheck nextpkt"); 809 } 810 } 811 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 812 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 813 mbcnt, sb->sb_mbcnt); 814 panic("sbcheck"); 815 } 816 } 817 #endif 818 819 /* 820 * As above, except the mbuf chain 821 * begins a new record. 822 */ 823 void 824 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 825 { 826 struct mbuf *m; 827 828 KASSERT(solocked(sb->sb_so)); 829 830 if (m0 == 0) 831 return; 832 833 #ifdef MBUFTRACE 834 m_claimm(m0, sb->sb_mowner); 835 #endif 836 /* 837 * Put the first mbuf on the queue. 838 * Note this permits zero length records. 839 */ 840 sballoc(sb, m0); 841 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 842 SBLINKRECORD(sb, m0); 843 m = m0->m_next; 844 m0->m_next = 0; 845 if (m && (m0->m_flags & M_EOR)) { 846 m0->m_flags &= ~M_EOR; 847 m->m_flags |= M_EOR; 848 } 849 sbcompress(sb, m, m0); 850 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 851 } 852 853 /* 854 * As above except that OOB data 855 * is inserted at the beginning of the sockbuf, 856 * but after any other OOB data. 857 */ 858 void 859 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 860 { 861 struct mbuf *m, **mp; 862 863 KASSERT(solocked(sb->sb_so)); 864 865 if (m0 == 0) 866 return; 867 868 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 869 870 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 871 again: 872 switch (m->m_type) { 873 874 case MT_OOBDATA: 875 continue; /* WANT next train */ 876 877 case MT_CONTROL: 878 if ((m = m->m_next) != NULL) 879 goto again; /* inspect THIS train further */ 880 } 881 break; 882 } 883 /* 884 * Put the first mbuf on the queue. 885 * Note this permits zero length records. 886 */ 887 sballoc(sb, m0); 888 m0->m_nextpkt = *mp; 889 if (*mp == NULL) { 890 /* m0 is actually the new tail */ 891 sb->sb_lastrecord = m0; 892 } 893 *mp = m0; 894 m = m0->m_next; 895 m0->m_next = 0; 896 if (m && (m0->m_flags & M_EOR)) { 897 m0->m_flags &= ~M_EOR; 898 m->m_flags |= M_EOR; 899 } 900 sbcompress(sb, m, m0); 901 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 902 } 903 904 /* 905 * Append address and data, and optionally, control (ancillary) data 906 * to the receive queue of a socket. If present, 907 * m0 must include a packet header with total length. 908 * Returns 0 if no space in sockbuf or insufficient mbufs. 909 */ 910 int 911 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 912 struct mbuf *control) 913 { 914 struct mbuf *m, *n, *nlast; 915 int space, len; 916 917 KASSERT(solocked(sb->sb_so)); 918 919 space = asa->sa_len; 920 921 if (m0 != NULL) { 922 if ((m0->m_flags & M_PKTHDR) == 0) 923 panic("sbappendaddr"); 924 space += m0->m_pkthdr.len; 925 #ifdef MBUFTRACE 926 m_claimm(m0, sb->sb_mowner); 927 #endif 928 } 929 for (n = control; n; n = n->m_next) { 930 space += n->m_len; 931 MCLAIM(n, sb->sb_mowner); 932 if (n->m_next == 0) /* keep pointer to last control buf */ 933 break; 934 } 935 if (space > sbspace(sb)) 936 return (0); 937 MGET(m, M_DONTWAIT, MT_SONAME); 938 if (m == 0) 939 return (0); 940 MCLAIM(m, sb->sb_mowner); 941 /* 942 * XXX avoid 'comparison always true' warning which isn't easily 943 * avoided. 944 */ 945 len = asa->sa_len; 946 if (len > MLEN) { 947 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 948 if ((m->m_flags & M_EXT) == 0) { 949 m_free(m); 950 return (0); 951 } 952 } 953 m->m_len = asa->sa_len; 954 memcpy(mtod(m, void *), asa, asa->sa_len); 955 if (n) 956 n->m_next = m0; /* concatenate data to control */ 957 else 958 control = m0; 959 m->m_next = control; 960 961 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 962 963 for (n = m; n->m_next != NULL; n = n->m_next) 964 sballoc(sb, n); 965 sballoc(sb, n); 966 nlast = n; 967 SBLINKRECORD(sb, m); 968 969 sb->sb_mbtail = nlast; 970 SBLASTMBUFCHK(sb, "sbappendaddr"); 971 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 972 973 return (1); 974 } 975 976 /* 977 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 978 * an mbuf chain. 979 */ 980 static inline struct mbuf * 981 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 982 const struct sockaddr *asa) 983 { 984 struct mbuf *m; 985 const int salen = asa->sa_len; 986 987 KASSERT(solocked(sb->sb_so)); 988 989 /* only the first in each chain need be a pkthdr */ 990 MGETHDR(m, M_DONTWAIT, MT_SONAME); 991 if (m == 0) 992 return (0); 993 MCLAIM(m, sb->sb_mowner); 994 #ifdef notyet 995 if (salen > MHLEN) { 996 MEXTMALLOC(m, salen, M_NOWAIT); 997 if ((m->m_flags & M_EXT) == 0) { 998 m_free(m); 999 return (0); 1000 } 1001 } 1002 #else 1003 KASSERT(salen <= MHLEN); 1004 #endif 1005 m->m_len = salen; 1006 memcpy(mtod(m, void *), asa, salen); 1007 m->m_next = m0; 1008 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1009 1010 return m; 1011 } 1012 1013 int 1014 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1015 struct mbuf *m0, int sbprio) 1016 { 1017 int space; 1018 struct mbuf *m, *n, *n0, *nlast; 1019 int error; 1020 1021 KASSERT(solocked(sb->sb_so)); 1022 1023 /* 1024 * XXX sbprio reserved for encoding priority of this* request: 1025 * SB_PRIO_NONE --> honour normal sb limits 1026 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1027 * take whole chain. Intended for large requests 1028 * that should be delivered atomically (all, or none). 1029 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1030 * over normal socket limits, for messages indicating 1031 * buffer overflow in earlier normal/lower-priority messages 1032 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1033 * Intended for kernel-generated messages only. 1034 * Up to generator to avoid total mbuf resource exhaustion. 1035 */ 1036 (void)sbprio; 1037 1038 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1039 panic("sbappendaddrchain"); 1040 1041 space = sbspace(sb); 1042 1043 #ifdef notyet 1044 /* 1045 * Enforce SB_PRIO_* limits as described above. 1046 */ 1047 #endif 1048 1049 n0 = NULL; 1050 nlast = NULL; 1051 for (m = m0; m; m = m->m_nextpkt) { 1052 struct mbuf *np; 1053 1054 #ifdef MBUFTRACE 1055 m_claimm(m, sb->sb_mowner); 1056 #endif 1057 1058 /* Prepend sockaddr to this record (m) of input chain m0 */ 1059 n = m_prepend_sockaddr(sb, m, asa); 1060 if (n == NULL) { 1061 error = ENOBUFS; 1062 goto bad; 1063 } 1064 1065 /* Append record (asa+m) to end of new chain n0 */ 1066 if (n0 == NULL) { 1067 n0 = n; 1068 } else { 1069 nlast->m_nextpkt = n; 1070 } 1071 /* Keep track of last record on new chain */ 1072 nlast = n; 1073 1074 for (np = n; np; np = np->m_next) 1075 sballoc(sb, np); 1076 } 1077 1078 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1079 1080 /* Drop the entire chain of (asa+m) records onto the socket */ 1081 SBLINKRECORDCHAIN(sb, n0, nlast); 1082 1083 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1084 1085 for (m = nlast; m->m_next; m = m->m_next) 1086 ; 1087 sb->sb_mbtail = m; 1088 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1089 1090 return (1); 1091 1092 bad: 1093 /* 1094 * On error, free the prepended addreseses. For consistency 1095 * with sbappendaddr(), leave it to our caller to free 1096 * the input record chain passed to us as m0. 1097 */ 1098 while ((n = n0) != NULL) { 1099 struct mbuf *np; 1100 1101 /* Undo the sballoc() of this record */ 1102 for (np = n; np; np = np->m_next) 1103 sbfree(sb, np); 1104 1105 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1106 MFREE(n, np); /* free prepended address (not data) */ 1107 } 1108 return 0; 1109 } 1110 1111 1112 int 1113 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1114 { 1115 struct mbuf *m, *mlast, *n; 1116 int space; 1117 1118 KASSERT(solocked(sb->sb_so)); 1119 1120 space = 0; 1121 if (control == 0) 1122 panic("sbappendcontrol"); 1123 for (m = control; ; m = m->m_next) { 1124 space += m->m_len; 1125 MCLAIM(m, sb->sb_mowner); 1126 if (m->m_next == 0) 1127 break; 1128 } 1129 n = m; /* save pointer to last control buffer */ 1130 for (m = m0; m; m = m->m_next) { 1131 MCLAIM(m, sb->sb_mowner); 1132 space += m->m_len; 1133 } 1134 if (space > sbspace(sb)) 1135 return (0); 1136 n->m_next = m0; /* concatenate data to control */ 1137 1138 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1139 1140 for (m = control; m->m_next != NULL; m = m->m_next) 1141 sballoc(sb, m); 1142 sballoc(sb, m); 1143 mlast = m; 1144 SBLINKRECORD(sb, control); 1145 1146 sb->sb_mbtail = mlast; 1147 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1148 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1149 1150 return (1); 1151 } 1152 1153 /* 1154 * Compress mbuf chain m into the socket 1155 * buffer sb following mbuf n. If n 1156 * is null, the buffer is presumed empty. 1157 */ 1158 void 1159 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1160 { 1161 int eor; 1162 struct mbuf *o; 1163 1164 KASSERT(solocked(sb->sb_so)); 1165 1166 eor = 0; 1167 while (m) { 1168 eor |= m->m_flags & M_EOR; 1169 if (m->m_len == 0 && 1170 (eor == 0 || 1171 (((o = m->m_next) || (o = n)) && 1172 o->m_type == m->m_type))) { 1173 if (sb->sb_lastrecord == m) 1174 sb->sb_lastrecord = m->m_next; 1175 m = m_free(m); 1176 continue; 1177 } 1178 if (n && (n->m_flags & M_EOR) == 0 && 1179 /* M_TRAILINGSPACE() checks buffer writeability */ 1180 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1181 m->m_len <= M_TRAILINGSPACE(n) && 1182 n->m_type == m->m_type) { 1183 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1184 (unsigned)m->m_len); 1185 n->m_len += m->m_len; 1186 sb->sb_cc += m->m_len; 1187 m = m_free(m); 1188 continue; 1189 } 1190 if (n) 1191 n->m_next = m; 1192 else 1193 sb->sb_mb = m; 1194 sb->sb_mbtail = m; 1195 sballoc(sb, m); 1196 n = m; 1197 m->m_flags &= ~M_EOR; 1198 m = m->m_next; 1199 n->m_next = 0; 1200 } 1201 if (eor) { 1202 if (n) 1203 n->m_flags |= eor; 1204 else 1205 printf("semi-panic: sbcompress\n"); 1206 } 1207 SBLASTMBUFCHK(sb, __func__); 1208 } 1209 1210 /* 1211 * Free all mbufs in a sockbuf. 1212 * Check that all resources are reclaimed. 1213 */ 1214 void 1215 sbflush(struct sockbuf *sb) 1216 { 1217 1218 KASSERT(solocked(sb->sb_so)); 1219 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1220 1221 while (sb->sb_mbcnt) 1222 sbdrop(sb, (int)sb->sb_cc); 1223 1224 KASSERT(sb->sb_cc == 0); 1225 KASSERT(sb->sb_mb == NULL); 1226 KASSERT(sb->sb_mbtail == NULL); 1227 KASSERT(sb->sb_lastrecord == NULL); 1228 } 1229 1230 /* 1231 * Drop data from (the front of) a sockbuf. 1232 */ 1233 void 1234 sbdrop(struct sockbuf *sb, int len) 1235 { 1236 struct mbuf *m, *mn, *next; 1237 1238 KASSERT(solocked(sb->sb_so)); 1239 1240 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1241 while (len > 0) { 1242 if (m == 0) { 1243 if (next == 0) 1244 panic("sbdrop"); 1245 m = next; 1246 next = m->m_nextpkt; 1247 continue; 1248 } 1249 if (m->m_len > len) { 1250 m->m_len -= len; 1251 m->m_data += len; 1252 sb->sb_cc -= len; 1253 break; 1254 } 1255 len -= m->m_len; 1256 sbfree(sb, m); 1257 MFREE(m, mn); 1258 m = mn; 1259 } 1260 while (m && m->m_len == 0) { 1261 sbfree(sb, m); 1262 MFREE(m, mn); 1263 m = mn; 1264 } 1265 if (m) { 1266 sb->sb_mb = m; 1267 m->m_nextpkt = next; 1268 } else 1269 sb->sb_mb = next; 1270 /* 1271 * First part is an inline SB_EMPTY_FIXUP(). Second part 1272 * makes sure sb_lastrecord is up-to-date if we dropped 1273 * part of the last record. 1274 */ 1275 m = sb->sb_mb; 1276 if (m == NULL) { 1277 sb->sb_mbtail = NULL; 1278 sb->sb_lastrecord = NULL; 1279 } else if (m->m_nextpkt == NULL) 1280 sb->sb_lastrecord = m; 1281 } 1282 1283 /* 1284 * Drop a record off the front of a sockbuf 1285 * and move the next record to the front. 1286 */ 1287 void 1288 sbdroprecord(struct sockbuf *sb) 1289 { 1290 struct mbuf *m, *mn; 1291 1292 KASSERT(solocked(sb->sb_so)); 1293 1294 m = sb->sb_mb; 1295 if (m) { 1296 sb->sb_mb = m->m_nextpkt; 1297 do { 1298 sbfree(sb, m); 1299 MFREE(m, mn); 1300 } while ((m = mn) != NULL); 1301 } 1302 SB_EMPTY_FIXUP(sb); 1303 } 1304 1305 /* 1306 * Create a "control" mbuf containing the specified data 1307 * with the specified type for presentation on a socket buffer. 1308 */ 1309 struct mbuf * 1310 sbcreatecontrol(void *p, int size, int type, int level) 1311 { 1312 struct cmsghdr *cp; 1313 struct mbuf *m; 1314 1315 if (CMSG_SPACE(size) > MCLBYTES) { 1316 printf("sbcreatecontrol: message too large %d\n", size); 1317 return NULL; 1318 } 1319 1320 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1321 return ((struct mbuf *) NULL); 1322 if (CMSG_SPACE(size) > MLEN) { 1323 MCLGET(m, M_DONTWAIT); 1324 if ((m->m_flags & M_EXT) == 0) { 1325 m_free(m); 1326 return NULL; 1327 } 1328 } 1329 cp = mtod(m, struct cmsghdr *); 1330 memcpy(CMSG_DATA(cp), p, size); 1331 m->m_len = CMSG_SPACE(size); 1332 cp->cmsg_len = CMSG_LEN(size); 1333 cp->cmsg_level = level; 1334 cp->cmsg_type = type; 1335 return (m); 1336 } 1337 1338 void 1339 solockretry(struct socket *so, kmutex_t *lock) 1340 { 1341 1342 while (lock != so->so_lock) { 1343 mutex_exit(lock); 1344 lock = so->so_lock; 1345 mutex_enter(lock); 1346 } 1347 } 1348 1349 bool 1350 solocked(struct socket *so) 1351 { 1352 1353 return mutex_owned(so->so_lock); 1354 } 1355 1356 bool 1357 solocked2(struct socket *so1, struct socket *so2) 1358 { 1359 kmutex_t *lock; 1360 1361 lock = so1->so_lock; 1362 if (lock != so2->so_lock) 1363 return false; 1364 return mutex_owned(lock); 1365 } 1366 1367 /* 1368 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1369 * protocols that do not have special locking requirements. 1370 */ 1371 void 1372 sosetlock(struct socket *so) 1373 { 1374 kmutex_t *lock; 1375 1376 if (so->so_lock == NULL) { 1377 lock = softnet_lock; 1378 so->so_lock = lock; 1379 mutex_obj_hold(lock); 1380 mutex_enter(lock); 1381 } 1382 1383 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1384 KASSERT(solocked(so)); 1385 } 1386 1387 /* 1388 * Set lock on sockbuf sb; sleep if lock is already held. 1389 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1390 * Returns error without lock if sleep is interrupted. 1391 */ 1392 int 1393 sblock(struct sockbuf *sb, int wf) 1394 { 1395 struct socket *so; 1396 kmutex_t *lock; 1397 int error; 1398 1399 KASSERT(solocked(sb->sb_so)); 1400 1401 for (;;) { 1402 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1403 sb->sb_flags |= SB_LOCK; 1404 return 0; 1405 } 1406 if (wf != M_WAITOK) 1407 return EWOULDBLOCK; 1408 so = sb->sb_so; 1409 lock = so->so_lock; 1410 if ((sb->sb_flags & SB_NOINTR) != 0) { 1411 cv_wait(&so->so_cv, lock); 1412 error = 0; 1413 } else 1414 error = cv_wait_sig(&so->so_cv, lock); 1415 if (__predict_false(lock != so->so_lock)) 1416 solockretry(so, lock); 1417 if (error != 0) 1418 return error; 1419 } 1420 } 1421 1422 void 1423 sbunlock(struct sockbuf *sb) 1424 { 1425 struct socket *so; 1426 1427 so = sb->sb_so; 1428 1429 KASSERT(solocked(so)); 1430 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1431 1432 sb->sb_flags &= ~SB_LOCK; 1433 cv_broadcast(&so->so_cv); 1434 } 1435 1436 int 1437 sowait(struct socket *so, bool catch, int timo) 1438 { 1439 kmutex_t *lock; 1440 int error; 1441 1442 KASSERT(solocked(so)); 1443 KASSERT(catch || timo != 0); 1444 1445 lock = so->so_lock; 1446 if (catch) 1447 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1448 else 1449 error = cv_timedwait(&so->so_cv, lock, timo); 1450 if (__predict_false(lock != so->so_lock)) 1451 solockretry(so, lock); 1452 return error; 1453 } 1454