1 /* $NetBSD: uipc_socket2.c,v 1.102 2009/04/09 00:43:38 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1988, 1990, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.102 2009/04/09 00:43:38 yamt Exp $"); 62 63 #include "opt_mbuftrace.h" 64 #include "opt_sb_max.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/proc.h> 69 #include <sys/file.h> 70 #include <sys/buf.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/protosw.h> 74 #include <sys/domain.h> 75 #include <sys/poll.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/signalvar.h> 79 #include <sys/kauth.h> 80 #include <sys/pool.h> 81 #include <sys/uidinfo.h> 82 83 /* 84 * Primitive routines for operating on sockets and socket buffers. 85 * 86 * Locking rules and assumptions: 87 * 88 * o socket::so_lock can change on the fly. The low level routines used 89 * to lock sockets are aware of this. When so_lock is acquired, the 90 * routine locking must check to see if so_lock still points to the 91 * lock that was acquired. If so_lock has changed in the meantime, the 92 * now irellevant lock that was acquired must be dropped and the lock 93 * operation retried. Although not proven here, this is completely safe 94 * on a multiprocessor system, even with relaxed memory ordering, given 95 * the next two rules: 96 * 97 * o In order to mutate so_lock, the lock pointed to by the current value 98 * of so_lock must be held: i.e., the socket must be held locked by the 99 * changing thread. The thread must issue membar_exit() to prevent 100 * memory accesses being reordered, and can set so_lock to the desired 101 * value. If the lock pointed to by the new value of so_lock is not 102 * held by the changing thread, the socket must then be considered 103 * unlocked. 104 * 105 * o If so_lock is mutated, and the previous lock referred to by so_lock 106 * could still be visible to other threads in the system (e.g. via file 107 * descriptor or protocol-internal reference), then the old lock must 108 * remain valid until the socket and/or protocol control block has been 109 * torn down. 110 * 111 * o If a socket has a non-NULL so_head value (i.e. is in the process of 112 * connecting), then locking the socket must also lock the socket pointed 113 * to by so_head: their lock pointers must match. 114 * 115 * o If a socket has connections in progress (so_q, so_q0 not empty) then 116 * locking the socket must also lock the sockets attached to both queues. 117 * Again, their lock pointers must match. 118 * 119 * o Beyond the initial lock assigment in socreate(), assigning locks to 120 * sockets is the responsibility of the individual protocols / protocol 121 * domains. 122 */ 123 124 static pool_cache_t socket_cache; 125 126 u_long sb_max = SB_MAX; /* maximum socket buffer size */ 127 static u_long sb_max_adj; /* adjusted sb_max */ 128 129 /* 130 * Procedures to manipulate state flags of socket 131 * and do appropriate wakeups. Normal sequence from the 132 * active (originating) side is that soisconnecting() is 133 * called during processing of connect() call, 134 * resulting in an eventual call to soisconnected() if/when the 135 * connection is established. When the connection is torn down 136 * soisdisconnecting() is called during processing of disconnect() call, 137 * and soisdisconnected() is called when the connection to the peer 138 * is totally severed. The semantics of these routines are such that 139 * connectionless protocols can call soisconnected() and soisdisconnected() 140 * only, bypassing the in-progress calls when setting up a ``connection'' 141 * takes no time. 142 * 143 * From the passive side, a socket is created with 144 * two queues of sockets: so_q0 for connections in progress 145 * and so_q for connections already made and awaiting user acceptance. 146 * As a protocol is preparing incoming connections, it creates a socket 147 * structure queued on so_q0 by calling sonewconn(). When the connection 148 * is established, soisconnected() is called, and transfers the 149 * socket structure to so_q, making it available to accept(). 150 * 151 * If a socket is closed with sockets on either 152 * so_q0 or so_q, these sockets are dropped. 153 * 154 * If higher level protocols are implemented in 155 * the kernel, the wakeups done here will sometimes 156 * cause software-interrupt process scheduling. 157 */ 158 159 void 160 soisconnecting(struct socket *so) 161 { 162 163 KASSERT(solocked(so)); 164 165 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 166 so->so_state |= SS_ISCONNECTING; 167 } 168 169 void 170 soisconnected(struct socket *so) 171 { 172 struct socket *head; 173 174 head = so->so_head; 175 176 KASSERT(solocked(so)); 177 KASSERT(head == NULL || solocked2(so, head)); 178 179 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 180 so->so_state |= SS_ISCONNECTED; 181 if (head && so->so_onq == &head->so_q0) { 182 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 183 soqremque(so, 0); 184 soqinsque(head, so, 1); 185 sorwakeup(head); 186 cv_broadcast(&head->so_cv); 187 } else { 188 so->so_upcall = 189 head->so_accf->so_accept_filter->accf_callback; 190 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 191 so->so_rcv.sb_flags |= SB_UPCALL; 192 so->so_options &= ~SO_ACCEPTFILTER; 193 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 194 } 195 } else { 196 cv_broadcast(&so->so_cv); 197 sorwakeup(so); 198 sowwakeup(so); 199 } 200 } 201 202 void 203 soisdisconnecting(struct socket *so) 204 { 205 206 KASSERT(solocked(so)); 207 208 so->so_state &= ~SS_ISCONNECTING; 209 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 210 cv_broadcast(&so->so_cv); 211 sowwakeup(so); 212 sorwakeup(so); 213 } 214 215 void 216 soisdisconnected(struct socket *so) 217 { 218 219 KASSERT(solocked(so)); 220 221 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 222 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 223 cv_broadcast(&so->so_cv); 224 sowwakeup(so); 225 sorwakeup(so); 226 } 227 228 void 229 soinit2(void) 230 { 231 232 socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0, 233 "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL); 234 } 235 236 /* 237 * When an attempt at a new connection is noted on a socket 238 * which accepts connections, sonewconn is called. If the 239 * connection is possible (subject to space constraints, etc.) 240 * then we allocate a new structure, propoerly linked into the 241 * data structure of the original socket, and return this. 242 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 243 */ 244 struct socket * 245 sonewconn(struct socket *head, int connstatus) 246 { 247 struct socket *so; 248 int soqueue, error; 249 250 KASSERT(connstatus == 0 || connstatus == SS_ISCONFIRMING || 251 connstatus == SS_ISCONNECTED); 252 KASSERT(solocked(head)); 253 254 if ((head->so_options & SO_ACCEPTFILTER) != 0) 255 connstatus = 0; 256 soqueue = connstatus ? 1 : 0; 257 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 258 return NULL; 259 so = soget(false); 260 if (so == NULL) 261 return NULL; 262 mutex_obj_hold(head->so_lock); 263 so->so_lock = head->so_lock; 264 so->so_type = head->so_type; 265 so->so_options = head->so_options &~ SO_ACCEPTCONN; 266 so->so_linger = head->so_linger; 267 so->so_state = head->so_state | SS_NOFDREF; 268 so->so_nbio = head->so_nbio; 269 so->so_proto = head->so_proto; 270 so->so_timeo = head->so_timeo; 271 so->so_pgid = head->so_pgid; 272 so->so_send = head->so_send; 273 so->so_receive = head->so_receive; 274 so->so_uidinfo = head->so_uidinfo; 275 so->so_egid = head->so_egid; 276 so->so_cpid = head->so_cpid; 277 #ifdef MBUFTRACE 278 so->so_mowner = head->so_mowner; 279 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 280 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 281 #endif 282 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 283 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 284 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 285 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 286 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 287 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 288 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 289 soqinsque(head, so, soqueue); 290 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 291 NULL, NULL); 292 KASSERT(solocked(so)); 293 if (error != 0) { 294 (void) soqremque(so, soqueue); 295 /* 296 * Remove acccept filter if one is present. 297 * XXX Is this really needed? 298 */ 299 if (so->so_accf != NULL) 300 (void)accept_filt_clear(so); 301 soput(so); 302 return NULL; 303 } 304 if (connstatus) { 305 sorwakeup(head); 306 cv_broadcast(&head->so_cv); 307 so->so_state |= connstatus; 308 } 309 return so; 310 } 311 312 struct socket * 313 soget(bool waitok) 314 { 315 struct socket *so; 316 317 so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); 318 if (__predict_false(so == NULL)) 319 return (NULL); 320 memset(so, 0, sizeof(*so)); 321 TAILQ_INIT(&so->so_q0); 322 TAILQ_INIT(&so->so_q); 323 cv_init(&so->so_cv, "socket"); 324 cv_init(&so->so_rcv.sb_cv, "netio"); 325 cv_init(&so->so_snd.sb_cv, "netio"); 326 selinit(&so->so_rcv.sb_sel); 327 selinit(&so->so_snd.sb_sel); 328 so->so_rcv.sb_so = so; 329 so->so_snd.sb_so = so; 330 return so; 331 } 332 333 void 334 soput(struct socket *so) 335 { 336 337 KASSERT(!cv_has_waiters(&so->so_cv)); 338 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 339 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 340 seldestroy(&so->so_rcv.sb_sel); 341 seldestroy(&so->so_snd.sb_sel); 342 mutex_obj_free(so->so_lock); 343 cv_destroy(&so->so_cv); 344 cv_destroy(&so->so_rcv.sb_cv); 345 cv_destroy(&so->so_snd.sb_cv); 346 pool_cache_put(socket_cache, so); 347 } 348 349 void 350 soqinsque(struct socket *head, struct socket *so, int q) 351 { 352 353 KASSERT(solocked2(head, so)); 354 355 #ifdef DIAGNOSTIC 356 if (so->so_onq != NULL) 357 panic("soqinsque"); 358 #endif 359 360 so->so_head = head; 361 if (q == 0) { 362 head->so_q0len++; 363 so->so_onq = &head->so_q0; 364 } else { 365 head->so_qlen++; 366 so->so_onq = &head->so_q; 367 } 368 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 369 } 370 371 int 372 soqremque(struct socket *so, int q) 373 { 374 struct socket *head; 375 376 head = so->so_head; 377 378 KASSERT(solocked(so)); 379 if (q == 0) { 380 if (so->so_onq != &head->so_q0) 381 return (0); 382 head->so_q0len--; 383 } else { 384 if (so->so_onq != &head->so_q) 385 return (0); 386 head->so_qlen--; 387 } 388 KASSERT(solocked2(so, head)); 389 TAILQ_REMOVE(so->so_onq, so, so_qe); 390 so->so_onq = NULL; 391 so->so_head = NULL; 392 return (1); 393 } 394 395 /* 396 * Socantsendmore indicates that no more data will be sent on the 397 * socket; it would normally be applied to a socket when the user 398 * informs the system that no more data is to be sent, by the protocol 399 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 400 * will be received, and will normally be applied to the socket by a 401 * protocol when it detects that the peer will send no more data. 402 * Data queued for reading in the socket may yet be read. 403 */ 404 405 void 406 socantsendmore(struct socket *so) 407 { 408 409 KASSERT(solocked(so)); 410 411 so->so_state |= SS_CANTSENDMORE; 412 sowwakeup(so); 413 } 414 415 void 416 socantrcvmore(struct socket *so) 417 { 418 419 KASSERT(solocked(so)); 420 421 so->so_state |= SS_CANTRCVMORE; 422 sorwakeup(so); 423 } 424 425 /* 426 * Wait for data to arrive at/drain from a socket buffer. 427 */ 428 int 429 sbwait(struct sockbuf *sb) 430 { 431 struct socket *so; 432 kmutex_t *lock; 433 int error; 434 435 so = sb->sb_so; 436 437 KASSERT(solocked(so)); 438 439 sb->sb_flags |= SB_NOTIFY; 440 lock = so->so_lock; 441 if ((sb->sb_flags & SB_NOINTR) != 0) 442 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 443 else 444 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 445 if (__predict_false(lock != so->so_lock)) 446 solockretry(so, lock); 447 return error; 448 } 449 450 /* 451 * Wakeup processes waiting on a socket buffer. 452 * Do asynchronous notification via SIGIO 453 * if the socket buffer has the SB_ASYNC flag set. 454 */ 455 void 456 sowakeup(struct socket *so, struct sockbuf *sb, int code) 457 { 458 int band; 459 460 KASSERT(solocked(so)); 461 KASSERT(sb->sb_so == so); 462 463 if (code == POLL_IN) 464 band = POLLIN|POLLRDNORM; 465 else 466 band = POLLOUT|POLLWRNORM; 467 sb->sb_flags &= ~SB_NOTIFY; 468 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 469 cv_broadcast(&sb->sb_cv); 470 if (sb->sb_flags & SB_ASYNC) 471 fownsignal(so->so_pgid, SIGIO, code, band, so); 472 if (sb->sb_flags & SB_UPCALL) 473 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 474 } 475 476 /* 477 * Reset a socket's lock pointer. Wake all threads waiting on the 478 * socket's condition variables so that they can restart their waits 479 * using the new lock. The existing lock must be held. 480 */ 481 void 482 solockreset(struct socket *so, kmutex_t *lock) 483 { 484 485 KASSERT(solocked(so)); 486 487 so->so_lock = lock; 488 cv_broadcast(&so->so_snd.sb_cv); 489 cv_broadcast(&so->so_rcv.sb_cv); 490 cv_broadcast(&so->so_cv); 491 } 492 493 /* 494 * Socket buffer (struct sockbuf) utility routines. 495 * 496 * Each socket contains two socket buffers: one for sending data and 497 * one for receiving data. Each buffer contains a queue of mbufs, 498 * information about the number of mbufs and amount of data in the 499 * queue, and other fields allowing poll() statements and notification 500 * on data availability to be implemented. 501 * 502 * Data stored in a socket buffer is maintained as a list of records. 503 * Each record is a list of mbufs chained together with the m_next 504 * field. Records are chained together with the m_nextpkt field. The upper 505 * level routine soreceive() expects the following conventions to be 506 * observed when placing information in the receive buffer: 507 * 508 * 1. If the protocol requires each message be preceded by the sender's 509 * name, then a record containing that name must be present before 510 * any associated data (mbuf's must be of type MT_SONAME). 511 * 2. If the protocol supports the exchange of ``access rights'' (really 512 * just additional data associated with the message), and there are 513 * ``rights'' to be received, then a record containing this data 514 * should be present (mbuf's must be of type MT_CONTROL). 515 * 3. If a name or rights record exists, then it must be followed by 516 * a data record, perhaps of zero length. 517 * 518 * Before using a new socket structure it is first necessary to reserve 519 * buffer space to the socket, by calling sbreserve(). This should commit 520 * some of the available buffer space in the system buffer pool for the 521 * socket (currently, it does nothing but enforce limits). The space 522 * should be released by calling sbrelease() when the socket is destroyed. 523 */ 524 525 int 526 sb_max_set(u_long new_sbmax) 527 { 528 int s; 529 530 if (new_sbmax < (16 * 1024)) 531 return (EINVAL); 532 533 s = splsoftnet(); 534 sb_max = new_sbmax; 535 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 536 splx(s); 537 538 return (0); 539 } 540 541 int 542 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 543 { 544 545 KASSERT(so->so_lock == NULL || solocked(so)); 546 547 /* 548 * there's at least one application (a configure script of screen) 549 * which expects a fifo is writable even if it has "some" bytes 550 * in its buffer. 551 * so we want to make sure (hiwat - lowat) >= (some bytes). 552 * 553 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 554 * we expect it's large enough for such applications. 555 */ 556 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 557 u_long hiwat = lowat + PIPE_BUF; 558 559 if (sndcc < hiwat) 560 sndcc = hiwat; 561 if (sbreserve(&so->so_snd, sndcc, so) == 0) 562 goto bad; 563 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 564 goto bad2; 565 if (so->so_rcv.sb_lowat == 0) 566 so->so_rcv.sb_lowat = 1; 567 if (so->so_snd.sb_lowat == 0) 568 so->so_snd.sb_lowat = lowat; 569 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 570 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 571 return (0); 572 bad2: 573 sbrelease(&so->so_snd, so); 574 bad: 575 return (ENOBUFS); 576 } 577 578 /* 579 * Allot mbufs to a sockbuf. 580 * Attempt to scale mbmax so that mbcnt doesn't become limiting 581 * if buffering efficiency is near the normal case. 582 */ 583 int 584 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 585 { 586 struct lwp *l = curlwp; /* XXX */ 587 rlim_t maxcc; 588 struct uidinfo *uidinfo; 589 590 KASSERT(so->so_lock == NULL || solocked(so)); 591 KASSERT(sb->sb_so == so); 592 KASSERT(sb_max_adj != 0); 593 594 if (cc == 0 || cc > sb_max_adj) 595 return (0); 596 597 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 598 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 599 else 600 maxcc = RLIM_INFINITY; 601 602 uidinfo = so->so_uidinfo; 603 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 604 return 0; 605 sb->sb_mbmax = min(cc * 2, sb_max); 606 if (sb->sb_lowat > sb->sb_hiwat) 607 sb->sb_lowat = sb->sb_hiwat; 608 return (1); 609 } 610 611 /* 612 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 613 * that the socket is held locked here: see sorflush(). 614 */ 615 void 616 sbrelease(struct sockbuf *sb, struct socket *so) 617 { 618 619 KASSERT(sb->sb_so == so); 620 621 sbflush(sb); 622 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 623 sb->sb_mbmax = 0; 624 } 625 626 /* 627 * Routines to add and remove 628 * data from an mbuf queue. 629 * 630 * The routines sbappend() or sbappendrecord() are normally called to 631 * append new mbufs to a socket buffer, after checking that adequate 632 * space is available, comparing the function sbspace() with the amount 633 * of data to be added. sbappendrecord() differs from sbappend() in 634 * that data supplied is treated as the beginning of a new record. 635 * To place a sender's address, optional access rights, and data in a 636 * socket receive buffer, sbappendaddr() should be used. To place 637 * access rights and data in a socket receive buffer, sbappendrights() 638 * should be used. In either case, the new data begins a new record. 639 * Note that unlike sbappend() and sbappendrecord(), these routines check 640 * for the caller that there will be enough space to store the data. 641 * Each fails if there is not enough space, or if it cannot find mbufs 642 * to store additional information in. 643 * 644 * Reliable protocols may use the socket send buffer to hold data 645 * awaiting acknowledgement. Data is normally copied from a socket 646 * send buffer in a protocol with m_copy for output to a peer, 647 * and then removing the data from the socket buffer with sbdrop() 648 * or sbdroprecord() when the data is acknowledged by the peer. 649 */ 650 651 #ifdef SOCKBUF_DEBUG 652 void 653 sblastrecordchk(struct sockbuf *sb, const char *where) 654 { 655 struct mbuf *m = sb->sb_mb; 656 657 KASSERT(solocked(sb->sb_so)); 658 659 while (m && m->m_nextpkt) 660 m = m->m_nextpkt; 661 662 if (m != sb->sb_lastrecord) { 663 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 664 sb->sb_mb, sb->sb_lastrecord, m); 665 printf("packet chain:\n"); 666 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 667 printf("\t%p\n", m); 668 panic("sblastrecordchk from %s", where); 669 } 670 } 671 672 void 673 sblastmbufchk(struct sockbuf *sb, const char *where) 674 { 675 struct mbuf *m = sb->sb_mb; 676 struct mbuf *n; 677 678 KASSERT(solocked(sb->sb_so)); 679 680 while (m && m->m_nextpkt) 681 m = m->m_nextpkt; 682 683 while (m && m->m_next) 684 m = m->m_next; 685 686 if (m != sb->sb_mbtail) { 687 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 688 sb->sb_mb, sb->sb_mbtail, m); 689 printf("packet tree:\n"); 690 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 691 printf("\t"); 692 for (n = m; n != NULL; n = n->m_next) 693 printf("%p ", n); 694 printf("\n"); 695 } 696 panic("sblastmbufchk from %s", where); 697 } 698 } 699 #endif /* SOCKBUF_DEBUG */ 700 701 /* 702 * Link a chain of records onto a socket buffer 703 */ 704 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 705 do { \ 706 if ((sb)->sb_lastrecord != NULL) \ 707 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 708 else \ 709 (sb)->sb_mb = (m0); \ 710 (sb)->sb_lastrecord = (mlast); \ 711 } while (/*CONSTCOND*/0) 712 713 714 #define SBLINKRECORD(sb, m0) \ 715 SBLINKRECORDCHAIN(sb, m0, m0) 716 717 /* 718 * Append mbuf chain m to the last record in the 719 * socket buffer sb. The additional space associated 720 * the mbuf chain is recorded in sb. Empty mbufs are 721 * discarded and mbufs are compacted where possible. 722 */ 723 void 724 sbappend(struct sockbuf *sb, struct mbuf *m) 725 { 726 struct mbuf *n; 727 728 KASSERT(solocked(sb->sb_so)); 729 730 if (m == 0) 731 return; 732 733 #ifdef MBUFTRACE 734 m_claimm(m, sb->sb_mowner); 735 #endif 736 737 SBLASTRECORDCHK(sb, "sbappend 1"); 738 739 if ((n = sb->sb_lastrecord) != NULL) { 740 /* 741 * XXX Would like to simply use sb_mbtail here, but 742 * XXX I need to verify that I won't miss an EOR that 743 * XXX way. 744 */ 745 do { 746 if (n->m_flags & M_EOR) { 747 sbappendrecord(sb, m); /* XXXXXX!!!! */ 748 return; 749 } 750 } while (n->m_next && (n = n->m_next)); 751 } else { 752 /* 753 * If this is the first record in the socket buffer, it's 754 * also the last record. 755 */ 756 sb->sb_lastrecord = m; 757 } 758 sbcompress(sb, m, n); 759 SBLASTRECORDCHK(sb, "sbappend 2"); 760 } 761 762 /* 763 * This version of sbappend() should only be used when the caller 764 * absolutely knows that there will never be more than one record 765 * in the socket buffer, that is, a stream protocol (such as TCP). 766 */ 767 void 768 sbappendstream(struct sockbuf *sb, struct mbuf *m) 769 { 770 771 KASSERT(solocked(sb->sb_so)); 772 KDASSERT(m->m_nextpkt == NULL); 773 KASSERT(sb->sb_mb == sb->sb_lastrecord); 774 775 SBLASTMBUFCHK(sb, __func__); 776 777 #ifdef MBUFTRACE 778 m_claimm(m, sb->sb_mowner); 779 #endif 780 781 sbcompress(sb, m, sb->sb_mbtail); 782 783 sb->sb_lastrecord = sb->sb_mb; 784 SBLASTRECORDCHK(sb, __func__); 785 } 786 787 #ifdef SOCKBUF_DEBUG 788 void 789 sbcheck(struct sockbuf *sb) 790 { 791 struct mbuf *m, *m2; 792 u_long len, mbcnt; 793 794 KASSERT(solocked(sb->sb_so)); 795 796 len = 0; 797 mbcnt = 0; 798 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 799 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 800 len += m2->m_len; 801 mbcnt += MSIZE; 802 if (m2->m_flags & M_EXT) 803 mbcnt += m2->m_ext.ext_size; 804 if (m2->m_nextpkt != NULL) 805 panic("sbcheck nextpkt"); 806 } 807 } 808 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 809 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 810 mbcnt, sb->sb_mbcnt); 811 panic("sbcheck"); 812 } 813 } 814 #endif 815 816 /* 817 * As above, except the mbuf chain 818 * begins a new record. 819 */ 820 void 821 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 822 { 823 struct mbuf *m; 824 825 KASSERT(solocked(sb->sb_so)); 826 827 if (m0 == 0) 828 return; 829 830 #ifdef MBUFTRACE 831 m_claimm(m0, sb->sb_mowner); 832 #endif 833 /* 834 * Put the first mbuf on the queue. 835 * Note this permits zero length records. 836 */ 837 sballoc(sb, m0); 838 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 839 SBLINKRECORD(sb, m0); 840 m = m0->m_next; 841 m0->m_next = 0; 842 if (m && (m0->m_flags & M_EOR)) { 843 m0->m_flags &= ~M_EOR; 844 m->m_flags |= M_EOR; 845 } 846 sbcompress(sb, m, m0); 847 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 848 } 849 850 /* 851 * As above except that OOB data 852 * is inserted at the beginning of the sockbuf, 853 * but after any other OOB data. 854 */ 855 void 856 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 857 { 858 struct mbuf *m, **mp; 859 860 KASSERT(solocked(sb->sb_so)); 861 862 if (m0 == 0) 863 return; 864 865 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 866 867 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 868 again: 869 switch (m->m_type) { 870 871 case MT_OOBDATA: 872 continue; /* WANT next train */ 873 874 case MT_CONTROL: 875 if ((m = m->m_next) != NULL) 876 goto again; /* inspect THIS train further */ 877 } 878 break; 879 } 880 /* 881 * Put the first mbuf on the queue. 882 * Note this permits zero length records. 883 */ 884 sballoc(sb, m0); 885 m0->m_nextpkt = *mp; 886 if (*mp == NULL) { 887 /* m0 is actually the new tail */ 888 sb->sb_lastrecord = m0; 889 } 890 *mp = m0; 891 m = m0->m_next; 892 m0->m_next = 0; 893 if (m && (m0->m_flags & M_EOR)) { 894 m0->m_flags &= ~M_EOR; 895 m->m_flags |= M_EOR; 896 } 897 sbcompress(sb, m, m0); 898 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 899 } 900 901 /* 902 * Append address and data, and optionally, control (ancillary) data 903 * to the receive queue of a socket. If present, 904 * m0 must include a packet header with total length. 905 * Returns 0 if no space in sockbuf or insufficient mbufs. 906 */ 907 int 908 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 909 struct mbuf *control) 910 { 911 struct mbuf *m, *n, *nlast; 912 int space, len; 913 914 KASSERT(solocked(sb->sb_so)); 915 916 space = asa->sa_len; 917 918 if (m0 != NULL) { 919 if ((m0->m_flags & M_PKTHDR) == 0) 920 panic("sbappendaddr"); 921 space += m0->m_pkthdr.len; 922 #ifdef MBUFTRACE 923 m_claimm(m0, sb->sb_mowner); 924 #endif 925 } 926 for (n = control; n; n = n->m_next) { 927 space += n->m_len; 928 MCLAIM(n, sb->sb_mowner); 929 if (n->m_next == 0) /* keep pointer to last control buf */ 930 break; 931 } 932 if (space > sbspace(sb)) 933 return (0); 934 MGET(m, M_DONTWAIT, MT_SONAME); 935 if (m == 0) 936 return (0); 937 MCLAIM(m, sb->sb_mowner); 938 /* 939 * XXX avoid 'comparison always true' warning which isn't easily 940 * avoided. 941 */ 942 len = asa->sa_len; 943 if (len > MLEN) { 944 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 945 if ((m->m_flags & M_EXT) == 0) { 946 m_free(m); 947 return (0); 948 } 949 } 950 m->m_len = asa->sa_len; 951 memcpy(mtod(m, void *), asa, asa->sa_len); 952 if (n) 953 n->m_next = m0; /* concatenate data to control */ 954 else 955 control = m0; 956 m->m_next = control; 957 958 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 959 960 for (n = m; n->m_next != NULL; n = n->m_next) 961 sballoc(sb, n); 962 sballoc(sb, n); 963 nlast = n; 964 SBLINKRECORD(sb, m); 965 966 sb->sb_mbtail = nlast; 967 SBLASTMBUFCHK(sb, "sbappendaddr"); 968 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 969 970 return (1); 971 } 972 973 /* 974 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 975 * an mbuf chain. 976 */ 977 static inline struct mbuf * 978 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 979 const struct sockaddr *asa) 980 { 981 struct mbuf *m; 982 const int salen = asa->sa_len; 983 984 KASSERT(solocked(sb->sb_so)); 985 986 /* only the first in each chain need be a pkthdr */ 987 MGETHDR(m, M_DONTWAIT, MT_SONAME); 988 if (m == 0) 989 return (0); 990 MCLAIM(m, sb->sb_mowner); 991 #ifdef notyet 992 if (salen > MHLEN) { 993 MEXTMALLOC(m, salen, M_NOWAIT); 994 if ((m->m_flags & M_EXT) == 0) { 995 m_free(m); 996 return (0); 997 } 998 } 999 #else 1000 KASSERT(salen <= MHLEN); 1001 #endif 1002 m->m_len = salen; 1003 memcpy(mtod(m, void *), asa, salen); 1004 m->m_next = m0; 1005 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 1006 1007 return m; 1008 } 1009 1010 int 1011 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 1012 struct mbuf *m0, int sbprio) 1013 { 1014 int space; 1015 struct mbuf *m, *n, *n0, *nlast; 1016 int error; 1017 1018 KASSERT(solocked(sb->sb_so)); 1019 1020 /* 1021 * XXX sbprio reserved for encoding priority of this* request: 1022 * SB_PRIO_NONE --> honour normal sb limits 1023 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 1024 * take whole chain. Intended for large requests 1025 * that should be delivered atomically (all, or none). 1026 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 1027 * over normal socket limits, for messages indicating 1028 * buffer overflow in earlier normal/lower-priority messages 1029 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 1030 * Intended for kernel-generated messages only. 1031 * Up to generator to avoid total mbuf resource exhaustion. 1032 */ 1033 (void)sbprio; 1034 1035 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 1036 panic("sbappendaddrchain"); 1037 1038 space = sbspace(sb); 1039 1040 #ifdef notyet 1041 /* 1042 * Enforce SB_PRIO_* limits as described above. 1043 */ 1044 #endif 1045 1046 n0 = NULL; 1047 nlast = NULL; 1048 for (m = m0; m; m = m->m_nextpkt) { 1049 struct mbuf *np; 1050 1051 #ifdef MBUFTRACE 1052 m_claimm(m, sb->sb_mowner); 1053 #endif 1054 1055 /* Prepend sockaddr to this record (m) of input chain m0 */ 1056 n = m_prepend_sockaddr(sb, m, asa); 1057 if (n == NULL) { 1058 error = ENOBUFS; 1059 goto bad; 1060 } 1061 1062 /* Append record (asa+m) to end of new chain n0 */ 1063 if (n0 == NULL) { 1064 n0 = n; 1065 } else { 1066 nlast->m_nextpkt = n; 1067 } 1068 /* Keep track of last record on new chain */ 1069 nlast = n; 1070 1071 for (np = n; np; np = np->m_next) 1072 sballoc(sb, np); 1073 } 1074 1075 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1076 1077 /* Drop the entire chain of (asa+m) records onto the socket */ 1078 SBLINKRECORDCHAIN(sb, n0, nlast); 1079 1080 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1081 1082 for (m = nlast; m->m_next; m = m->m_next) 1083 ; 1084 sb->sb_mbtail = m; 1085 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1086 1087 return (1); 1088 1089 bad: 1090 /* 1091 * On error, free the prepended addreseses. For consistency 1092 * with sbappendaddr(), leave it to our caller to free 1093 * the input record chain passed to us as m0. 1094 */ 1095 while ((n = n0) != NULL) { 1096 struct mbuf *np; 1097 1098 /* Undo the sballoc() of this record */ 1099 for (np = n; np; np = np->m_next) 1100 sbfree(sb, np); 1101 1102 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1103 MFREE(n, np); /* free prepended address (not data) */ 1104 } 1105 return 0; 1106 } 1107 1108 1109 int 1110 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1111 { 1112 struct mbuf *m, *mlast, *n; 1113 int space; 1114 1115 KASSERT(solocked(sb->sb_so)); 1116 1117 space = 0; 1118 if (control == 0) 1119 panic("sbappendcontrol"); 1120 for (m = control; ; m = m->m_next) { 1121 space += m->m_len; 1122 MCLAIM(m, sb->sb_mowner); 1123 if (m->m_next == 0) 1124 break; 1125 } 1126 n = m; /* save pointer to last control buffer */ 1127 for (m = m0; m; m = m->m_next) { 1128 MCLAIM(m, sb->sb_mowner); 1129 space += m->m_len; 1130 } 1131 if (space > sbspace(sb)) 1132 return (0); 1133 n->m_next = m0; /* concatenate data to control */ 1134 1135 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1136 1137 for (m = control; m->m_next != NULL; m = m->m_next) 1138 sballoc(sb, m); 1139 sballoc(sb, m); 1140 mlast = m; 1141 SBLINKRECORD(sb, control); 1142 1143 sb->sb_mbtail = mlast; 1144 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1145 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1146 1147 return (1); 1148 } 1149 1150 /* 1151 * Compress mbuf chain m into the socket 1152 * buffer sb following mbuf n. If n 1153 * is null, the buffer is presumed empty. 1154 */ 1155 void 1156 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1157 { 1158 int eor; 1159 struct mbuf *o; 1160 1161 KASSERT(solocked(sb->sb_so)); 1162 1163 eor = 0; 1164 while (m) { 1165 eor |= m->m_flags & M_EOR; 1166 if (m->m_len == 0 && 1167 (eor == 0 || 1168 (((o = m->m_next) || (o = n)) && 1169 o->m_type == m->m_type))) { 1170 if (sb->sb_lastrecord == m) 1171 sb->sb_lastrecord = m->m_next; 1172 m = m_free(m); 1173 continue; 1174 } 1175 if (n && (n->m_flags & M_EOR) == 0 && 1176 /* M_TRAILINGSPACE() checks buffer writeability */ 1177 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1178 m->m_len <= M_TRAILINGSPACE(n) && 1179 n->m_type == m->m_type) { 1180 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1181 (unsigned)m->m_len); 1182 n->m_len += m->m_len; 1183 sb->sb_cc += m->m_len; 1184 m = m_free(m); 1185 continue; 1186 } 1187 if (n) 1188 n->m_next = m; 1189 else 1190 sb->sb_mb = m; 1191 sb->sb_mbtail = m; 1192 sballoc(sb, m); 1193 n = m; 1194 m->m_flags &= ~M_EOR; 1195 m = m->m_next; 1196 n->m_next = 0; 1197 } 1198 if (eor) { 1199 if (n) 1200 n->m_flags |= eor; 1201 else 1202 printf("semi-panic: sbcompress\n"); 1203 } 1204 SBLASTMBUFCHK(sb, __func__); 1205 } 1206 1207 /* 1208 * Free all mbufs in a sockbuf. 1209 * Check that all resources are reclaimed. 1210 */ 1211 void 1212 sbflush(struct sockbuf *sb) 1213 { 1214 1215 KASSERT(solocked(sb->sb_so)); 1216 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1217 1218 while (sb->sb_mbcnt) 1219 sbdrop(sb, (int)sb->sb_cc); 1220 1221 KASSERT(sb->sb_cc == 0); 1222 KASSERT(sb->sb_mb == NULL); 1223 KASSERT(sb->sb_mbtail == NULL); 1224 KASSERT(sb->sb_lastrecord == NULL); 1225 } 1226 1227 /* 1228 * Drop data from (the front of) a sockbuf. 1229 */ 1230 void 1231 sbdrop(struct sockbuf *sb, int len) 1232 { 1233 struct mbuf *m, *mn, *next; 1234 1235 KASSERT(solocked(sb->sb_so)); 1236 1237 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1238 while (len > 0) { 1239 if (m == 0) { 1240 if (next == 0) 1241 panic("sbdrop"); 1242 m = next; 1243 next = m->m_nextpkt; 1244 continue; 1245 } 1246 if (m->m_len > len) { 1247 m->m_len -= len; 1248 m->m_data += len; 1249 sb->sb_cc -= len; 1250 break; 1251 } 1252 len -= m->m_len; 1253 sbfree(sb, m); 1254 MFREE(m, mn); 1255 m = mn; 1256 } 1257 while (m && m->m_len == 0) { 1258 sbfree(sb, m); 1259 MFREE(m, mn); 1260 m = mn; 1261 } 1262 if (m) { 1263 sb->sb_mb = m; 1264 m->m_nextpkt = next; 1265 } else 1266 sb->sb_mb = next; 1267 /* 1268 * First part is an inline SB_EMPTY_FIXUP(). Second part 1269 * makes sure sb_lastrecord is up-to-date if we dropped 1270 * part of the last record. 1271 */ 1272 m = sb->sb_mb; 1273 if (m == NULL) { 1274 sb->sb_mbtail = NULL; 1275 sb->sb_lastrecord = NULL; 1276 } else if (m->m_nextpkt == NULL) 1277 sb->sb_lastrecord = m; 1278 } 1279 1280 /* 1281 * Drop a record off the front of a sockbuf 1282 * and move the next record to the front. 1283 */ 1284 void 1285 sbdroprecord(struct sockbuf *sb) 1286 { 1287 struct mbuf *m, *mn; 1288 1289 KASSERT(solocked(sb->sb_so)); 1290 1291 m = sb->sb_mb; 1292 if (m) { 1293 sb->sb_mb = m->m_nextpkt; 1294 do { 1295 sbfree(sb, m); 1296 MFREE(m, mn); 1297 } while ((m = mn) != NULL); 1298 } 1299 SB_EMPTY_FIXUP(sb); 1300 } 1301 1302 /* 1303 * Create a "control" mbuf containing the specified data 1304 * with the specified type for presentation on a socket buffer. 1305 */ 1306 struct mbuf * 1307 sbcreatecontrol(void *p, int size, int type, int level) 1308 { 1309 struct cmsghdr *cp; 1310 struct mbuf *m; 1311 1312 if (CMSG_SPACE(size) > MCLBYTES) { 1313 printf("sbcreatecontrol: message too large %d\n", size); 1314 return NULL; 1315 } 1316 1317 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1318 return ((struct mbuf *) NULL); 1319 if (CMSG_SPACE(size) > MLEN) { 1320 MCLGET(m, M_DONTWAIT); 1321 if ((m->m_flags & M_EXT) == 0) { 1322 m_free(m); 1323 return NULL; 1324 } 1325 } 1326 cp = mtod(m, struct cmsghdr *); 1327 memcpy(CMSG_DATA(cp), p, size); 1328 m->m_len = CMSG_SPACE(size); 1329 cp->cmsg_len = CMSG_LEN(size); 1330 cp->cmsg_level = level; 1331 cp->cmsg_type = type; 1332 return (m); 1333 } 1334 1335 void 1336 solockretry(struct socket *so, kmutex_t *lock) 1337 { 1338 1339 while (lock != so->so_lock) { 1340 mutex_exit(lock); 1341 lock = so->so_lock; 1342 mutex_enter(lock); 1343 } 1344 } 1345 1346 bool 1347 solocked(struct socket *so) 1348 { 1349 1350 return mutex_owned(so->so_lock); 1351 } 1352 1353 bool 1354 solocked2(struct socket *so1, struct socket *so2) 1355 { 1356 kmutex_t *lock; 1357 1358 lock = so1->so_lock; 1359 if (lock != so2->so_lock) 1360 return false; 1361 return mutex_owned(lock); 1362 } 1363 1364 /* 1365 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1366 * protocols that do not have special locking requirements. 1367 */ 1368 void 1369 sosetlock(struct socket *so) 1370 { 1371 kmutex_t *lock; 1372 1373 if (so->so_lock == NULL) { 1374 lock = softnet_lock; 1375 so->so_lock = lock; 1376 mutex_obj_hold(lock); 1377 mutex_enter(lock); 1378 } 1379 1380 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1381 KASSERT(solocked(so)); 1382 } 1383 1384 /* 1385 * Set lock on sockbuf sb; sleep if lock is already held. 1386 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1387 * Returns error without lock if sleep is interrupted. 1388 */ 1389 int 1390 sblock(struct sockbuf *sb, int wf) 1391 { 1392 struct socket *so; 1393 kmutex_t *lock; 1394 int error; 1395 1396 KASSERT(solocked(sb->sb_so)); 1397 1398 for (;;) { 1399 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1400 sb->sb_flags |= SB_LOCK; 1401 return 0; 1402 } 1403 if (wf != M_WAITOK) 1404 return EWOULDBLOCK; 1405 so = sb->sb_so; 1406 lock = so->so_lock; 1407 if ((sb->sb_flags & SB_NOINTR) != 0) { 1408 cv_wait(&so->so_cv, lock); 1409 error = 0; 1410 } else 1411 error = cv_wait_sig(&so->so_cv, lock); 1412 if (__predict_false(lock != so->so_lock)) 1413 solockretry(so, lock); 1414 if (error != 0) 1415 return error; 1416 } 1417 } 1418 1419 void 1420 sbunlock(struct sockbuf *sb) 1421 { 1422 struct socket *so; 1423 1424 so = sb->sb_so; 1425 1426 KASSERT(solocked(so)); 1427 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1428 1429 sb->sb_flags &= ~SB_LOCK; 1430 cv_broadcast(&so->so_cv); 1431 } 1432 1433 int 1434 sowait(struct socket *so, bool catch, int timo) 1435 { 1436 kmutex_t *lock; 1437 int error; 1438 1439 KASSERT(solocked(so)); 1440 KASSERT(catch || timo != 0); 1441 1442 lock = so->so_lock; 1443 if (catch) 1444 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1445 else 1446 error = cv_timedwait(&so->so_cv, lock, timo); 1447 if (__predict_false(lock != so->so_lock)) 1448 solockretry(so, lock); 1449 return error; 1450 } 1451