1 /* $NetBSD: uipc_socket2.c,v 1.108 2011/04/24 18:46:23 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.108 2011/04/24 18:46:23 rmind Exp $"); 62 63 #include "opt_mbuftrace.h" 64 #include "opt_sb_max.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/proc.h> 69 #include <sys/file.h> 70 #include <sys/buf.h> 71 #include <sys/mbuf.h> 72 #include <sys/protosw.h> 73 #include <sys/domain.h> 74 #include <sys/poll.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/signalvar.h> 78 #include <sys/kauth.h> 79 #include <sys/pool.h> 80 #include <sys/uidinfo.h> 81 82 /* 83 * Primitive routines for operating on sockets and socket buffers. 84 * 85 * Locking rules and assumptions: 86 * 87 * o socket::so_lock can change on the fly. The low level routines used 88 * to lock sockets are aware of this. When so_lock is acquired, the 89 * routine locking must check to see if so_lock still points to the 90 * lock that was acquired. If so_lock has changed in the meantime, the 91 * now irellevant lock that was acquired must be dropped and the lock 92 * operation retried. Although not proven here, this is completely safe 93 * on a multiprocessor system, even with relaxed memory ordering, given 94 * the next two rules: 95 * 96 * o In order to mutate so_lock, the lock pointed to by the current value 97 * of so_lock must be held: i.e., the socket must be held locked by the 98 * changing thread. The thread must issue membar_exit() to prevent 99 * memory accesses being reordered, and can set so_lock to the desired 100 * value. If the lock pointed to by the new value of so_lock is not 101 * held by the changing thread, the socket must then be considered 102 * unlocked. 103 * 104 * o If so_lock is mutated, and the previous lock referred to by so_lock 105 * could still be visible to other threads in the system (e.g. via file 106 * descriptor or protocol-internal reference), then the old lock must 107 * remain valid until the socket and/or protocol control block has been 108 * torn down. 109 * 110 * o If a socket has a non-NULL so_head value (i.e. is in the process of 111 * connecting), then locking the socket must also lock the socket pointed 112 * to by so_head: their lock pointers must match. 113 * 114 * o If a socket has connections in progress (so_q, so_q0 not empty) then 115 * locking the socket must also lock the sockets attached to both queues. 116 * Again, their lock pointers must match. 117 * 118 * o Beyond the initial lock assigment in socreate(), assigning locks to 119 * sockets is the responsibility of the individual protocols / protocol 120 * domains. 121 */ 122 123 static pool_cache_t socket_cache; 124 125 u_long sb_max = SB_MAX; /* maximum socket buffer size */ 126 static u_long sb_max_adj; /* adjusted sb_max */ 127 128 /* 129 * Procedures to manipulate state flags of socket 130 * and do appropriate wakeups. Normal sequence from the 131 * active (originating) side is that soisconnecting() is 132 * called during processing of connect() call, 133 * resulting in an eventual call to soisconnected() if/when the 134 * connection is established. When the connection is torn down 135 * soisdisconnecting() is called during processing of disconnect() call, 136 * and soisdisconnected() is called when the connection to the peer 137 * is totally severed. The semantics of these routines are such that 138 * connectionless protocols can call soisconnected() and soisdisconnected() 139 * only, bypassing the in-progress calls when setting up a ``connection'' 140 * takes no time. 141 * 142 * From the passive side, a socket is created with 143 * two queues of sockets: so_q0 for connections in progress 144 * and so_q for connections already made and awaiting user acceptance. 145 * As a protocol is preparing incoming connections, it creates a socket 146 * structure queued on so_q0 by calling sonewconn(). When the connection 147 * is established, soisconnected() is called, and transfers the 148 * socket structure to so_q, making it available to accept(). 149 * 150 * If a socket is closed with sockets on either 151 * so_q0 or so_q, these sockets are dropped. 152 * 153 * If higher level protocols are implemented in 154 * the kernel, the wakeups done here will sometimes 155 * cause software-interrupt process scheduling. 156 */ 157 158 void 159 soisconnecting(struct socket *so) 160 { 161 162 KASSERT(solocked(so)); 163 164 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 165 so->so_state |= SS_ISCONNECTING; 166 } 167 168 void 169 soisconnected(struct socket *so) 170 { 171 struct socket *head; 172 173 head = so->so_head; 174 175 KASSERT(solocked(so)); 176 KASSERT(head == NULL || solocked2(so, head)); 177 178 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 179 so->so_state |= SS_ISCONNECTED; 180 if (head && so->so_onq == &head->so_q0) { 181 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 182 soqremque(so, 0); 183 soqinsque(head, so, 1); 184 sorwakeup(head); 185 cv_broadcast(&head->so_cv); 186 } else { 187 so->so_upcall = 188 head->so_accf->so_accept_filter->accf_callback; 189 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 190 so->so_rcv.sb_flags |= SB_UPCALL; 191 so->so_options &= ~SO_ACCEPTFILTER; 192 (*so->so_upcall)(so, so->so_upcallarg, 193 POLLIN|POLLRDNORM, M_DONTWAIT); 194 } 195 } else { 196 cv_broadcast(&so->so_cv); 197 sorwakeup(so); 198 sowwakeup(so); 199 } 200 } 201 202 void 203 soisdisconnecting(struct socket *so) 204 { 205 206 KASSERT(solocked(so)); 207 208 so->so_state &= ~SS_ISCONNECTING; 209 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 210 cv_broadcast(&so->so_cv); 211 sowwakeup(so); 212 sorwakeup(so); 213 } 214 215 void 216 soisdisconnected(struct socket *so) 217 { 218 219 KASSERT(solocked(so)); 220 221 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 222 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 223 cv_broadcast(&so->so_cv); 224 sowwakeup(so); 225 sorwakeup(so); 226 } 227 228 void 229 soinit2(void) 230 { 231 232 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 233 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 234 } 235 236 /* 237 * When an attempt at a new connection is noted on a socket 238 * which accepts connections, sonewconn is called. If the 239 * connection is possible (subject to space constraints, etc.) 240 * then we allocate a new structure, propoerly linked into the 241 * data structure of the original socket, and return this. 242 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 243 */ 244 struct socket * 245 sonewconn(struct socket *head, int connstatus) 246 { 247 struct socket *so; 248 int soqueue, error; 249 250 KASSERT(connstatus == 0 || connstatus == SS_ISCONFIRMING || 251 connstatus == SS_ISCONNECTED); 252 KASSERT(solocked(head)); 253 254 if ((head->so_options & SO_ACCEPTFILTER) != 0) 255 connstatus = 0; 256 soqueue = connstatus ? 1 : 0; 257 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 258 return NULL; 259 so = soget(false); 260 if (so == NULL) 261 return NULL; 262 mutex_obj_hold(head->so_lock); 263 so->so_lock = head->so_lock; 264 so->so_type = head->so_type; 265 so->so_options = head->so_options &~ SO_ACCEPTCONN; 266 so->so_linger = head->so_linger; 267 so->so_state = head->so_state | SS_NOFDREF; 268 so->so_nbio = head->so_nbio; 269 so->so_proto = head->so_proto; 270 so->so_timeo = head->so_timeo; 271 so->so_pgid = head->so_pgid; 272 so->so_send = head->so_send; 273 so->so_receive = head->so_receive; 274 so->so_uidinfo = head->so_uidinfo; 275 so->so_cpid = head->so_cpid; 276 #ifdef MBUFTRACE 277 so->so_mowner = head->so_mowner; 278 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 279 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 280 #endif 281 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0) 282 goto out; 283 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 284 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 285 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 286 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 287 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 288 so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC); 289 soqinsque(head, so, soqueue); 290 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 291 NULL, NULL); 292 KASSERT(solocked(so)); 293 if (error != 0) { 294 (void) soqremque(so, soqueue); 295 out: 296 /* 297 * Remove acccept filter if one is present. 298 * XXX Is this really needed? 299 */ 300 if (so->so_accf != NULL) 301 (void)accept_filt_clear(so); 302 soput(so); 303 return NULL; 304 } 305 if (connstatus) { 306 sorwakeup(head); 307 cv_broadcast(&head->so_cv); 308 so->so_state |= connstatus; 309 } 310 return so; 311 } 312 313 struct socket * 314 soget(bool waitok) 315 { 316 struct socket *so; 317 318 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 319 if (__predict_false(so == NULL)) 320 return (NULL); 321 memset(so, 0, sizeof(*so)); 322 TAILQ_INIT(&so->so_q0); 323 TAILQ_INIT(&so->so_q); 324 cv_init(&so->so_cv, "socket"); 325 cv_init(&so->so_rcv.sb_cv, "netio"); 326 cv_init(&so->so_snd.sb_cv, "netio"); 327 selinit(&so->so_rcv.sb_sel); 328 selinit(&so->so_snd.sb_sel); 329 so->so_rcv.sb_so = so; 330 so->so_snd.sb_so = so; 331 return so; 332 } 333 334 void 335 soput(struct socket *so) 336 { 337 338 KASSERT(!cv_has_waiters(&so->so_cv)); 339 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 340 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 341 seldestroy(&so->so_rcv.sb_sel); 342 seldestroy(&so->so_snd.sb_sel); 343 mutex_obj_free(so->so_lock); 344 cv_destroy(&so->so_cv); 345 cv_destroy(&so->so_rcv.sb_cv); 346 cv_destroy(&so->so_snd.sb_cv); 347 pool_cache_put(socket_cache, so); 348 } 349 350 void 351 soqinsque(struct socket *head, struct socket *so, int q) 352 { 353 354 KASSERT(solocked2(head, so)); 355 356 #ifdef DIAGNOSTIC 357 if (so->so_onq != NULL) 358 panic("soqinsque"); 359 #endif 360 361 so->so_head = head; 362 if (q == 0) { 363 head->so_q0len++; 364 so->so_onq = &head->so_q0; 365 } else { 366 head->so_qlen++; 367 so->so_onq = &head->so_q; 368 } 369 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 370 } 371 372 int 373 soqremque(struct socket *so, int q) 374 { 375 struct socket *head; 376 377 head = so->so_head; 378 379 KASSERT(solocked(so)); 380 if (q == 0) { 381 if (so->so_onq != &head->so_q0) 382 return (0); 383 head->so_q0len--; 384 } else { 385 if (so->so_onq != &head->so_q) 386 return (0); 387 head->so_qlen--; 388 } 389 KASSERT(solocked2(so, head)); 390 TAILQ_REMOVE(so->so_onq, so, so_qe); 391 so->so_onq = NULL; 392 so->so_head = NULL; 393 return (1); 394 } 395 396 /* 397 * Socantsendmore indicates that no more data will be sent on the 398 * socket; it would normally be applied to a socket when the user 399 * informs the system that no more data is to be sent, by the protocol 400 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 401 * will be received, and will normally be applied to the socket by a 402 * protocol when it detects that the peer will send no more data. 403 * Data queued for reading in the socket may yet be read. 404 */ 405 406 void 407 socantsendmore(struct socket *so) 408 { 409 410 KASSERT(solocked(so)); 411 412 so->so_state |= SS_CANTSENDMORE; 413 sowwakeup(so); 414 } 415 416 void 417 socantrcvmore(struct socket *so) 418 { 419 420 KASSERT(solocked(so)); 421 422 so->so_state |= SS_CANTRCVMORE; 423 sorwakeup(so); 424 } 425 426 /* 427 * Wait for data to arrive at/drain from a socket buffer. 428 */ 429 int 430 sbwait(struct sockbuf *sb) 431 { 432 struct socket *so; 433 kmutex_t *lock; 434 int error; 435 436 so = sb->sb_so; 437 438 KASSERT(solocked(so)); 439 440 sb->sb_flags |= SB_NOTIFY; 441 lock = so->so_lock; 442 if ((sb->sb_flags & SB_NOINTR) != 0) 443 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 444 else 445 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 446 if (__predict_false(lock != so->so_lock)) 447 solockretry(so, lock); 448 return error; 449 } 450 451 /* 452 * Wakeup processes waiting on a socket buffer. 453 * Do asynchronous notification via SIGIO 454 * if the socket buffer has the SB_ASYNC flag set. 455 */ 456 void 457 sowakeup(struct socket *so, struct sockbuf *sb, int code) 458 { 459 int band; 460 461 KASSERT(solocked(so)); 462 KASSERT(sb->sb_so == so); 463 464 if (code == POLL_IN) 465 band = POLLIN|POLLRDNORM; 466 else 467 band = POLLOUT|POLLWRNORM; 468 sb->sb_flags &= ~SB_NOTIFY; 469 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 470 cv_broadcast(&sb->sb_cv); 471 if (sb->sb_flags & SB_ASYNC) 472 fownsignal(so->so_pgid, SIGIO, code, band, so); 473 if (sb->sb_flags & SB_UPCALL) 474 (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT); 475 } 476 477 /* 478 * Reset a socket's lock pointer. Wake all threads waiting on the 479 * socket's condition variables so that they can restart their waits 480 * using the new lock. The existing lock must be held. 481 */ 482 void 483 solockreset(struct socket *so, kmutex_t *lock) 484 { 485 486 KASSERT(solocked(so)); 487 488 so->so_lock = lock; 489 cv_broadcast(&so->so_snd.sb_cv); 490 cv_broadcast(&so->so_rcv.sb_cv); 491 cv_broadcast(&so->so_cv); 492 } 493 494 /* 495 * Socket buffer (struct sockbuf) utility routines. 496 * 497 * Each socket contains two socket buffers: one for sending data and 498 * one for receiving data. Each buffer contains a queue of mbufs, 499 * information about the number of mbufs and amount of data in the 500 * queue, and other fields allowing poll() statements and notification 501 * on data availability to be implemented. 502 * 503 * Data stored in a socket buffer is maintained as a list of records. 504 * Each record is a list of mbufs chained together with the m_next 505 * field. Records are chained together with the m_nextpkt field. The upper 506 * level routine soreceive() expects the following conventions to be 507 * observed when placing information in the receive buffer: 508 * 509 * 1. If the protocol requires each message be preceded by the sender's 510 * name, then a record containing that name must be present before 511 * any associated data (mbuf's must be of type MT_SONAME). 512 * 2. If the protocol supports the exchange of ``access rights'' (really 513 * just additional data associated with the message), and there are 514 * ``rights'' to be received, then a record containing this data 515 * should be present (mbuf's must be of type MT_CONTROL). 516 * 3. If a name or rights record exists, then it must be followed by 517 * a data record, perhaps of zero length. 518 * 519 * Before using a new socket structure it is first necessary to reserve 520 * buffer space to the socket, by calling sbreserve(). This should commit 521 * some of the available buffer space in the system buffer pool for the 522 * socket (currently, it does nothing but enforce limits). The space 523 * should be released by calling sbrelease() when the socket is destroyed. 524 */ 525 526 int 527 sb_max_set(u_long new_sbmax) 528 { 529 int s; 530 531 if (new_sbmax < (16 * 1024)) 532 return (EINVAL); 533 534 s = splsoftnet(); 535 sb_max = new_sbmax; 536 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 537 splx(s); 538 539 return (0); 540 } 541 542 int 543 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 544 { 545 546 KASSERT(so->so_lock == NULL || solocked(so)); 547 548 /* 549 * there's at least one application (a configure script of screen) 550 * which expects a fifo is writable even if it has "some" bytes 551 * in its buffer. 552 * so we want to make sure (hiwat - lowat) >= (some bytes). 553 * 554 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 555 * we expect it's large enough for such applications. 556 */ 557 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 558 u_long hiwat = lowat + PIPE_BUF; 559 560 if (sndcc < hiwat) 561 sndcc = hiwat; 562 if (sbreserve(&so->so_snd, sndcc, so) == 0) 563 goto bad; 564 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 565 goto bad2; 566 if (so->so_rcv.sb_lowat == 0) 567 so->so_rcv.sb_lowat = 1; 568 if (so->so_snd.sb_lowat == 0) 569 so->so_snd.sb_lowat = lowat; 570 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 571 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 572 return (0); 573 bad2: 574 sbrelease(&so->so_snd, so); 575 bad: 576 return (ENOBUFS); 577 } 578 579 /* 580 * Allot mbufs to a sockbuf. 581 * Attempt to scale mbmax so that mbcnt doesn't become limiting 582 * if buffering efficiency is near the normal case. 583 */ 584 int 585 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 586 { 587 struct lwp *l = curlwp; /* XXX */ 588 rlim_t maxcc; 589 struct uidinfo *uidinfo; 590 591 KASSERT(so->so_lock == NULL || solocked(so)); 592 KASSERT(sb->sb_so == so); 593 KASSERT(sb_max_adj != 0); 594 595 if (cc == 0 || cc > sb_max_adj) 596 return (0); 597 598 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 599 600 uidinfo = so->so_uidinfo; 601 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 602 return 0; 603 sb->sb_mbmax = min(cc * 2, sb_max); 604 if (sb->sb_lowat > sb->sb_hiwat) 605 sb->sb_lowat = sb->sb_hiwat; 606 return (1); 607 } 608 609 /* 610 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 611 * that the socket is held locked here: see sorflush(). 612 */ 613 void 614 sbrelease(struct sockbuf *sb, struct socket *so) 615 { 616 617 KASSERT(sb->sb_so == so); 618 619 sbflush(sb); 620 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 621 sb->sb_mbmax = 0; 622 } 623 624 /* 625 * Routines to add and remove 626 * data from an mbuf queue. 627 * 628 * The routines sbappend() or sbappendrecord() are normally called to 629 * append new mbufs to a socket buffer, after checking that adequate 630 * space is available, comparing the function sbspace() with the amount 631 * of data to be added. sbappendrecord() differs from sbappend() in 632 * that data supplied is treated as the beginning of a new record. 633 * To place a sender's address, optional access rights, and data in a 634 * socket receive buffer, sbappendaddr() should be used. To place 635 * access rights and data in a socket receive buffer, sbappendrights() 636 * should be used. In either case, the new data begins a new record. 637 * Note that unlike sbappend() and sbappendrecord(), these routines check 638 * for the caller that there will be enough space to store the data. 639 * Each fails if there is not enough space, or if it cannot find mbufs 640 * to store additional information in. 641 * 642 * Reliable protocols may use the socket send buffer to hold data 643 * awaiting acknowledgement. Data is normally copied from a socket 644 * send buffer in a protocol with m_copy for output to a peer, 645 * and then removing the data from the socket buffer with sbdrop() 646 * or sbdroprecord() when the data is acknowledged by the peer. 647 */ 648 649 #ifdef SOCKBUF_DEBUG 650 void 651 sblastrecordchk(struct sockbuf *sb, const char *where) 652 { 653 struct mbuf *m = sb->sb_mb; 654 655 KASSERT(solocked(sb->sb_so)); 656 657 while (m && m->m_nextpkt) 658 m = m->m_nextpkt; 659 660 if (m != sb->sb_lastrecord) { 661 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 662 sb->sb_mb, sb->sb_lastrecord, m); 663 printf("packet chain:\n"); 664 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 665 printf("\t%p\n", m); 666 panic("sblastrecordchk from %s", where); 667 } 668 } 669 670 void 671 sblastmbufchk(struct sockbuf *sb, const char *where) 672 { 673 struct mbuf *m = sb->sb_mb; 674 struct mbuf *n; 675 676 KASSERT(solocked(sb->sb_so)); 677 678 while (m && m->m_nextpkt) 679 m = m->m_nextpkt; 680 681 while (m && m->m_next) 682 m = m->m_next; 683 684 if (m != sb->sb_mbtail) { 685 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 686 sb->sb_mb, sb->sb_mbtail, m); 687 printf("packet tree:\n"); 688 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 689 printf("\t"); 690 for (n = m; n != NULL; n = n->m_next) 691 printf("%p ", n); 692 printf("\n"); 693 } 694 panic("sblastmbufchk from %s", where); 695 } 696 } 697 #endif /* SOCKBUF_DEBUG */ 698 699 /* 700 * Link a chain of records onto a socket buffer 701 */ 702 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 703 do { \ 704 if ((sb)->sb_lastrecord != NULL) \ 705 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 706 else \ 707 (sb)->sb_mb = (m0); \ 708 (sb)->sb_lastrecord = (mlast); \ 709 } while (/*CONSTCOND*/0) 710 711 712 #define SBLINKRECORD(sb, m0) \ 713 SBLINKRECORDCHAIN(sb, m0, m0) 714 715 /* 716 * Append mbuf chain m to the last record in the 717 * socket buffer sb. The additional space associated 718 * the mbuf chain is recorded in sb. Empty mbufs are 719 * discarded and mbufs are compacted where possible. 720 */ 721 void 722 sbappend(struct sockbuf *sb, struct mbuf *m) 723 { 724 struct mbuf *n; 725 726 KASSERT(solocked(sb->sb_so)); 727 728 if (m == 0) 729 return; 730 731 #ifdef MBUFTRACE 732 m_claimm(m, sb->sb_mowner); 733 #endif 734 735 SBLASTRECORDCHK(sb, "sbappend 1"); 736 737 if ((n = sb->sb_lastrecord) != NULL) { 738 /* 739 * XXX Would like to simply use sb_mbtail here, but 740 * XXX I need to verify that I won't miss an EOR that 741 * XXX way. 742 */ 743 do { 744 if (n->m_flags & M_EOR) { 745 sbappendrecord(sb, m); /* XXXXXX!!!! */ 746 return; 747 } 748 } while (n->m_next && (n = n->m_next)); 749 } else { 750 /* 751 * If this is the first record in the socket buffer, it's 752 * also the last record. 753 */ 754 sb->sb_lastrecord = m; 755 } 756 sbcompress(sb, m, n); 757 SBLASTRECORDCHK(sb, "sbappend 2"); 758 } 759 760 /* 761 * This version of sbappend() should only be used when the caller 762 * absolutely knows that there will never be more than one record 763 * in the socket buffer, that is, a stream protocol (such as TCP). 764 */ 765 void 766 sbappendstream(struct sockbuf *sb, struct mbuf *m) 767 { 768 769 KASSERT(solocked(sb->sb_so)); 770 KDASSERT(m->m_nextpkt == NULL); 771 KASSERT(sb->sb_mb == sb->sb_lastrecord); 772 773 SBLASTMBUFCHK(sb, __func__); 774 775 #ifdef MBUFTRACE 776 m_claimm(m, sb->sb_mowner); 777 #endif 778 779 sbcompress(sb, m, sb->sb_mbtail); 780 781 sb->sb_lastrecord = sb->sb_mb; 782 SBLASTRECORDCHK(sb, __func__); 783 } 784 785 #ifdef SOCKBUF_DEBUG 786 void 787 sbcheck(struct sockbuf *sb) 788 { 789 struct mbuf *m, *m2; 790 u_long len, mbcnt; 791 792 KASSERT(solocked(sb->sb_so)); 793 794 len = 0; 795 mbcnt = 0; 796 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 797 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 798 len += m2->m_len; 799 mbcnt += MSIZE; 800 if (m2->m_flags & M_EXT) 801 mbcnt += m2->m_ext.ext_size; 802 if (m2->m_nextpkt != NULL) 803 panic("sbcheck nextpkt"); 804 } 805 } 806 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 807 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 808 mbcnt, sb->sb_mbcnt); 809 panic("sbcheck"); 810 } 811 } 812 #endif 813 814 /* 815 * As above, except the mbuf chain 816 * begins a new record. 817 */ 818 void 819 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 820 { 821 struct mbuf *m; 822 823 KASSERT(solocked(sb->sb_so)); 824 825 if (m0 == 0) 826 return; 827 828 #ifdef MBUFTRACE 829 m_claimm(m0, sb->sb_mowner); 830 #endif 831 /* 832 * Put the first mbuf on the queue. 833 * Note this permits zero length records. 834 */ 835 sballoc(sb, m0); 836 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 837 SBLINKRECORD(sb, m0); 838 m = m0->m_next; 839 m0->m_next = 0; 840 if (m && (m0->m_flags & M_EOR)) { 841 m0->m_flags &= ~M_EOR; 842 m->m_flags |= M_EOR; 843 } 844 sbcompress(sb, m, m0); 845 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 846 } 847 848 /* 849 * As above except that OOB data 850 * is inserted at the beginning of the sockbuf, 851 * but after any other OOB data. 852 */ 853 void 854 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 855 { 856 struct mbuf *m, **mp; 857 858 KASSERT(solocked(sb->sb_so)); 859 860 if (m0 == 0) 861 return; 862 863 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 864 865 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 866 again: 867 switch (m->m_type) { 868 869 case MT_OOBDATA: 870 continue; /* WANT next train */ 871 872 case MT_CONTROL: 873 if ((m = m->m_next) != NULL) 874 goto again; /* inspect THIS train further */ 875 } 876 break; 877 } 878 /* 879 * Put the first mbuf on the queue. 880 * Note this permits zero length records. 881 */ 882 sballoc(sb, m0); 883 m0->m_nextpkt = *mp; 884 if (*mp == NULL) { 885 /* m0 is actually the new tail */ 886 sb->sb_lastrecord = m0; 887 } 888 *mp = m0; 889 m = m0->m_next; 890 m0->m_next = 0; 891 if (m && (m0->m_flags & M_EOR)) { 892 m0->m_flags &= ~M_EOR; 893 m->m_flags |= M_EOR; 894 } 895 sbcompress(sb, m, m0); 896 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 897 } 898 899 /* 900 * Append address and data, and optionally, control (ancillary) data 901 * to the receive queue of a socket. If present, 902 * m0 must include a packet header with total length. 903 * Returns 0 if no space in sockbuf or insufficient mbufs. 904 */ 905 int 906 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 907 struct mbuf *control) 908 { 909 struct mbuf *m, *n, *nlast; 910 int space, len; 911 912 KASSERT(solocked(sb->sb_so)); 913 914 space = asa->sa_len; 915 916 if (m0 != NULL) { 917 if ((m0->m_flags & M_PKTHDR) == 0) 918 panic("sbappendaddr"); 919 space += m0->m_pkthdr.len; 920 #ifdef MBUFTRACE 921 m_claimm(m0, sb->sb_mowner); 922 #endif 923 } 924 for (n = control; n; n = n->m_next) { 925 space += n->m_len; 926 MCLAIM(n, sb->sb_mowner); 927 if (n->m_next == 0) /* keep pointer to last control buf */ 928 break; 929 } 930 if (space > sbspace(sb)) 931 return (0); 932 MGET(m, M_DONTWAIT, MT_SONAME); 933 if (m == 0) 934 return (0); 935 MCLAIM(m, sb->sb_mowner); 936 /* 937 * XXX avoid 'comparison always true' warning which isn't easily 938 * avoided. 939 */ 940 len = asa->sa_len; 941 if (len > MLEN) { 942 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 943 if ((m->m_flags & M_EXT) == 0) { 944 m_free(m); 945 return (0); 946 } 947 } 948 m->m_len = asa->sa_len; 949 memcpy(mtod(m, void *), asa, asa->sa_len); 950 if (n) 951 n->m_next = m0; /* concatenate data to control */ 952 else 953 control = m0; 954 m->m_next = control; 955 956 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 957 958 for (n = m; n->m_next != NULL; n = n->m_next) 959 sballoc(sb, n); 960 sballoc(sb, n); 961 nlast = n; 962 SBLINKRECORD(sb, m); 963 964 sb->sb_mbtail = nlast; 965 SBLASTMBUFCHK(sb, "sbappendaddr"); 966 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 967 968 return (1); 969 } 970 971 /* 972 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 973 * an mbuf chain. 974 */ 975 static inline struct mbuf * 976 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 977 const struct sockaddr *asa) 978 { 979 struct mbuf *m; 980 const int salen = asa->sa_len; 981 982 KASSERT(solocked(sb->sb_so)); 983 984 /* only the first in each chain need be a pkthdr */ 985 MGETHDR(m, M_DONTWAIT, MT_SONAME); 986 if (m == 0) 987 return (0); 988 MCLAIM(m, sb->sb_mowner); 989 #ifdef notyet 990 if (salen > MHLEN) { 991 MEXTMALLOC(m, salen, M_NOWAIT); 992 if ((m->m_flags & M_EXT) == 0) { 993 m_free(m); 994 return (0); 995 } 996 } 997 #else 998 KASSERT(salen <= MHLEN); 999 #endif 1000 m->m_len = salen; 1001 memcpy(mtod(m, void *), asa, salen); 1002 m->m_next = m0; 1003 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1004 1005 return m; 1006 } 1007 1008 int 1009 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1010 struct mbuf *m0, int sbprio) 1011 { 1012 int space; 1013 struct mbuf *m, *n, *n0, *nlast; 1014 int error; 1015 1016 KASSERT(solocked(sb->sb_so)); 1017 1018 /* 1019 * XXX sbprio reserved for encoding priority of this* request: 1020 * SB_PRIO_NONE --> honour normal sb limits 1021 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1022 * take whole chain. Intended for large requests 1023 * that should be delivered atomically (all, or none). 1024 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1025 * over normal socket limits, for messages indicating 1026 * buffer overflow in earlier normal/lower-priority messages 1027 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1028 * Intended for kernel-generated messages only. 1029 * Up to generator to avoid total mbuf resource exhaustion. 1030 */ 1031 (void)sbprio; 1032 1033 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1034 panic("sbappendaddrchain"); 1035 1036 space = sbspace(sb); 1037 1038 #ifdef notyet 1039 /* 1040 * Enforce SB_PRIO_* limits as described above. 1041 */ 1042 #endif 1043 1044 n0 = NULL; 1045 nlast = NULL; 1046 for (m = m0; m; m = m->m_nextpkt) { 1047 struct mbuf *np; 1048 1049 #ifdef MBUFTRACE 1050 m_claimm(m, sb->sb_mowner); 1051 #endif 1052 1053 /* Prepend sockaddr to this record (m) of input chain m0 */ 1054 n = m_prepend_sockaddr(sb, m, asa); 1055 if (n == NULL) { 1056 error = ENOBUFS; 1057 goto bad; 1058 } 1059 1060 /* Append record (asa+m) to end of new chain n0 */ 1061 if (n0 == NULL) { 1062 n0 = n; 1063 } else { 1064 nlast->m_nextpkt = n; 1065 } 1066 /* Keep track of last record on new chain */ 1067 nlast = n; 1068 1069 for (np = n; np; np = np->m_next) 1070 sballoc(sb, np); 1071 } 1072 1073 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1074 1075 /* Drop the entire chain of (asa+m) records onto the socket */ 1076 SBLINKRECORDCHAIN(sb, n0, nlast); 1077 1078 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1079 1080 for (m = nlast; m->m_next; m = m->m_next) 1081 ; 1082 sb->sb_mbtail = m; 1083 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1084 1085 return (1); 1086 1087 bad: 1088 /* 1089 * On error, free the prepended addreseses. For consistency 1090 * with sbappendaddr(), leave it to our caller to free 1091 * the input record chain passed to us as m0. 1092 */ 1093 while ((n = n0) != NULL) { 1094 struct mbuf *np; 1095 1096 /* Undo the sballoc() of this record */ 1097 for (np = n; np; np = np->m_next) 1098 sbfree(sb, np); 1099 1100 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1101 MFREE(n, np); /* free prepended address (not data) */ 1102 } 1103 return 0; 1104 } 1105 1106 1107 int 1108 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1109 { 1110 struct mbuf *m, *mlast, *n; 1111 int space; 1112 1113 KASSERT(solocked(sb->sb_so)); 1114 1115 space = 0; 1116 if (control == 0) 1117 panic("sbappendcontrol"); 1118 for (m = control; ; m = m->m_next) { 1119 space += m->m_len; 1120 MCLAIM(m, sb->sb_mowner); 1121 if (m->m_next == 0) 1122 break; 1123 } 1124 n = m; /* save pointer to last control buffer */ 1125 for (m = m0; m; m = m->m_next) { 1126 MCLAIM(m, sb->sb_mowner); 1127 space += m->m_len; 1128 } 1129 if (space > sbspace(sb)) 1130 return (0); 1131 n->m_next = m0; /* concatenate data to control */ 1132 1133 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1134 1135 for (m = control; m->m_next != NULL; m = m->m_next) 1136 sballoc(sb, m); 1137 sballoc(sb, m); 1138 mlast = m; 1139 SBLINKRECORD(sb, control); 1140 1141 sb->sb_mbtail = mlast; 1142 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1143 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1144 1145 return (1); 1146 } 1147 1148 /* 1149 * Compress mbuf chain m into the socket 1150 * buffer sb following mbuf n. If n 1151 * is null, the buffer is presumed empty. 1152 */ 1153 void 1154 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1155 { 1156 int eor; 1157 struct mbuf *o; 1158 1159 KASSERT(solocked(sb->sb_so)); 1160 1161 eor = 0; 1162 while (m) { 1163 eor |= m->m_flags & M_EOR; 1164 if (m->m_len == 0 && 1165 (eor == 0 || 1166 (((o = m->m_next) || (o = n)) && 1167 o->m_type == m->m_type))) { 1168 if (sb->sb_lastrecord == m) 1169 sb->sb_lastrecord = m->m_next; 1170 m = m_free(m); 1171 continue; 1172 } 1173 if (n && (n->m_flags & M_EOR) == 0 && 1174 /* M_TRAILINGSPACE() checks buffer writeability */ 1175 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1176 m->m_len <= M_TRAILINGSPACE(n) && 1177 n->m_type == m->m_type) { 1178 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1179 (unsigned)m->m_len); 1180 n->m_len += m->m_len; 1181 sb->sb_cc += m->m_len; 1182 m = m_free(m); 1183 continue; 1184 } 1185 if (n) 1186 n->m_next = m; 1187 else 1188 sb->sb_mb = m; 1189 sb->sb_mbtail = m; 1190 sballoc(sb, m); 1191 n = m; 1192 m->m_flags &= ~M_EOR; 1193 m = m->m_next; 1194 n->m_next = 0; 1195 } 1196 if (eor) { 1197 if (n) 1198 n->m_flags |= eor; 1199 else 1200 printf("semi-panic: sbcompress\n"); 1201 } 1202 SBLASTMBUFCHK(sb, __func__); 1203 } 1204 1205 /* 1206 * Free all mbufs in a sockbuf. 1207 * Check that all resources are reclaimed. 1208 */ 1209 void 1210 sbflush(struct sockbuf *sb) 1211 { 1212 1213 KASSERT(solocked(sb->sb_so)); 1214 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1215 1216 while (sb->sb_mbcnt) 1217 sbdrop(sb, (int)sb->sb_cc); 1218 1219 KASSERT(sb->sb_cc == 0); 1220 KASSERT(sb->sb_mb == NULL); 1221 KASSERT(sb->sb_mbtail == NULL); 1222 KASSERT(sb->sb_lastrecord == NULL); 1223 } 1224 1225 /* 1226 * Drop data from (the front of) a sockbuf. 1227 */ 1228 void 1229 sbdrop(struct sockbuf *sb, int len) 1230 { 1231 struct mbuf *m, *mn, *next; 1232 1233 KASSERT(solocked(sb->sb_so)); 1234 1235 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1236 while (len > 0) { 1237 if (m == 0) { 1238 if (next == 0) 1239 panic("sbdrop"); 1240 m = next; 1241 next = m->m_nextpkt; 1242 continue; 1243 } 1244 if (m->m_len > len) { 1245 m->m_len -= len; 1246 m->m_data += len; 1247 sb->sb_cc -= len; 1248 break; 1249 } 1250 len -= m->m_len; 1251 sbfree(sb, m); 1252 MFREE(m, mn); 1253 m = mn; 1254 } 1255 while (m && m->m_len == 0) { 1256 sbfree(sb, m); 1257 MFREE(m, mn); 1258 m = mn; 1259 } 1260 if (m) { 1261 sb->sb_mb = m; 1262 m->m_nextpkt = next; 1263 } else 1264 sb->sb_mb = next; 1265 /* 1266 * First part is an inline SB_EMPTY_FIXUP(). Second part 1267 * makes sure sb_lastrecord is up-to-date if we dropped 1268 * part of the last record. 1269 */ 1270 m = sb->sb_mb; 1271 if (m == NULL) { 1272 sb->sb_mbtail = NULL; 1273 sb->sb_lastrecord = NULL; 1274 } else if (m->m_nextpkt == NULL) 1275 sb->sb_lastrecord = m; 1276 } 1277 1278 /* 1279 * Drop a record off the front of a sockbuf 1280 * and move the next record to the front. 1281 */ 1282 void 1283 sbdroprecord(struct sockbuf *sb) 1284 { 1285 struct mbuf *m, *mn; 1286 1287 KASSERT(solocked(sb->sb_so)); 1288 1289 m = sb->sb_mb; 1290 if (m) { 1291 sb->sb_mb = m->m_nextpkt; 1292 do { 1293 sbfree(sb, m); 1294 MFREE(m, mn); 1295 } while ((m = mn) != NULL); 1296 } 1297 SB_EMPTY_FIXUP(sb); 1298 } 1299 1300 /* 1301 * Create a "control" mbuf containing the specified data 1302 * with the specified type for presentation on a socket buffer. 1303 */ 1304 struct mbuf * 1305 sbcreatecontrol(void *p, int size, int type, int level) 1306 { 1307 struct cmsghdr *cp; 1308 struct mbuf *m; 1309 1310 if (CMSG_SPACE(size) > MCLBYTES) { 1311 printf("sbcreatecontrol: message too large %d\n", size); 1312 return NULL; 1313 } 1314 1315 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1316 return ((struct mbuf *) NULL); 1317 if (CMSG_SPACE(size) > MLEN) { 1318 MCLGET(m, M_DONTWAIT); 1319 if ((m->m_flags & M_EXT) == 0) { 1320 m_free(m); 1321 return NULL; 1322 } 1323 } 1324 cp = mtod(m, struct cmsghdr *); 1325 memcpy(CMSG_DATA(cp), p, size); 1326 m->m_len = CMSG_SPACE(size); 1327 cp->cmsg_len = CMSG_LEN(size); 1328 cp->cmsg_level = level; 1329 cp->cmsg_type = type; 1330 return (m); 1331 } 1332 1333 void 1334 solockretry(struct socket *so, kmutex_t *lock) 1335 { 1336 1337 while (lock != so->so_lock) { 1338 mutex_exit(lock); 1339 lock = so->so_lock; 1340 mutex_enter(lock); 1341 } 1342 } 1343 1344 bool 1345 solocked(struct socket *so) 1346 { 1347 1348 return mutex_owned(so->so_lock); 1349 } 1350 1351 bool 1352 solocked2(struct socket *so1, struct socket *so2) 1353 { 1354 kmutex_t *lock; 1355 1356 lock = so1->so_lock; 1357 if (lock != so2->so_lock) 1358 return false; 1359 return mutex_owned(lock); 1360 } 1361 1362 /* 1363 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1364 * protocols that do not have special locking requirements. 1365 */ 1366 void 1367 sosetlock(struct socket *so) 1368 { 1369 kmutex_t *lock; 1370 1371 if (so->so_lock == NULL) { 1372 lock = softnet_lock; 1373 so->so_lock = lock; 1374 mutex_obj_hold(lock); 1375 mutex_enter(lock); 1376 } 1377 1378 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1379 KASSERT(solocked(so)); 1380 } 1381 1382 /* 1383 * Set lock on sockbuf sb; sleep if lock is already held. 1384 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1385 * Returns error without lock if sleep is interrupted. 1386 */ 1387 int 1388 sblock(struct sockbuf *sb, int wf) 1389 { 1390 struct socket *so; 1391 kmutex_t *lock; 1392 int error; 1393 1394 KASSERT(solocked(sb->sb_so)); 1395 1396 for (;;) { 1397 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1398 sb->sb_flags |= SB_LOCK; 1399 return 0; 1400 } 1401 if (wf != M_WAITOK) 1402 return EWOULDBLOCK; 1403 so = sb->sb_so; 1404 lock = so->so_lock; 1405 if ((sb->sb_flags & SB_NOINTR) != 0) { 1406 cv_wait(&so->so_cv, lock); 1407 error = 0; 1408 } else 1409 error = cv_wait_sig(&so->so_cv, lock); 1410 if (__predict_false(lock != so->so_lock)) 1411 solockretry(so, lock); 1412 if (error != 0) 1413 return error; 1414 } 1415 } 1416 1417 void 1418 sbunlock(struct sockbuf *sb) 1419 { 1420 struct socket *so; 1421 1422 so = sb->sb_so; 1423 1424 KASSERT(solocked(so)); 1425 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1426 1427 sb->sb_flags &= ~SB_LOCK; 1428 cv_broadcast(&so->so_cv); 1429 } 1430 1431 int 1432 sowait(struct socket *so, bool catch, int timo) 1433 { 1434 kmutex_t *lock; 1435 int error; 1436 1437 KASSERT(solocked(so)); 1438 KASSERT(catch || timo != 0); 1439 1440 lock = so->so_lock; 1441 if (catch) 1442 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1443 else 1444 error = cv_timedwait(&so->so_cv, lock, timo); 1445 if (__predict_false(lock != so->so_lock)) 1446 solockretry(so, lock); 1447 return error; 1448 } 1449