1 /* $NetBSD: uipc_socket2.c,v 1.91 2008/04/24 11:38:36 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the NetBSD 18 * Foundation, Inc. and its contributors. 19 * 4. Neither the name of The NetBSD Foundation nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1982, 1986, 1988, 1990, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 65 */ 66 67 #include <sys/cdefs.h> 68 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.91 2008/04/24 11:38:36 ad Exp $"); 69 70 #include "opt_mbuftrace.h" 71 #include "opt_sb_max.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/proc.h> 76 #include <sys/file.h> 77 #include <sys/buf.h> 78 #include <sys/malloc.h> 79 #include <sys/mbuf.h> 80 #include <sys/protosw.h> 81 #include <sys/domain.h> 82 #include <sys/poll.h> 83 #include <sys/socket.h> 84 #include <sys/socketvar.h> 85 #include <sys/signalvar.h> 86 #include <sys/kauth.h> 87 #include <sys/pool.h> 88 89 /* 90 * Primitive routines for operating on sockets and socket buffers. 91 * 92 * Locking rules and assumptions: 93 * 94 * o socket::so_lock can change on the fly. The low level routines used 95 * to lock sockets are aware of this. When so_lock is acquired, the 96 * routine locking must check to see if so_lock still points to the 97 * lock that was acquired. If so_lock has changed in the meantime, the 98 * now irellevant lock that was acquired must be dropped and the lock 99 * operation retried. Although not proven here, this is completely safe 100 * on a multiprocessor system, even with relaxed memory ordering, given 101 * the next two rules: 102 * 103 * o In order to mutate so_lock, the lock pointed to by the current value 104 * of so_lock must be held: i.e., the socket must be held locked by the 105 * changing thread. The thread must issue membar_exit() to prevent 106 * memory accesses being reordered, and can set so_lock to the desired 107 * value. If the lock pointed to by the new value of so_lock is not 108 * held by the changing thread, the socket must then be considered 109 * unlocked. 110 * 111 * o If so_lock is mutated, and the previous lock referred to by so_lock 112 * could still be visible to other threads in the system (e.g. via file 113 * descriptor or protocol-internal reference), then the old lock must 114 * remain valid until the socket and/or protocol control block has been 115 * torn down. 116 * 117 * o If a socket has a non-NULL so_head value (i.e. is in the process of 118 * connecting), then locking the socket must also lock the socket pointed 119 * to by so_head: their lock pointers must match. 120 * 121 * o If a socket has connections in progress (so_q, so_q0 not empty) then 122 * locking the socket must also lock the sockets attached to both queues. 123 * Again, their lock pointers must match. 124 * 125 * o Beyond the initial lock assigment in socreate(), assigning locks to 126 * sockets is the responsibility of the individual protocols / protocol 127 * domains. 128 */ 129 130 static POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL, 131 IPL_SOFTNET); 132 133 u_long sb_max = SB_MAX; /* maximum socket buffer size */ 134 static u_long sb_max_adj; /* adjusted sb_max */ 135 136 /* 137 * Procedures to manipulate state flags of socket 138 * and do appropriate wakeups. Normal sequence from the 139 * active (originating) side is that soisconnecting() is 140 * called during processing of connect() call, 141 * resulting in an eventual call to soisconnected() if/when the 142 * connection is established. When the connection is torn down 143 * soisdisconnecting() is called during processing of disconnect() call, 144 * and soisdisconnected() is called when the connection to the peer 145 * is totally severed. The semantics of these routines are such that 146 * connectionless protocols can call soisconnected() and soisdisconnected() 147 * only, bypassing the in-progress calls when setting up a ``connection'' 148 * takes no time. 149 * 150 * From the passive side, a socket is created with 151 * two queues of sockets: so_q0 for connections in progress 152 * and so_q for connections already made and awaiting user acceptance. 153 * As a protocol is preparing incoming connections, it creates a socket 154 * structure queued on so_q0 by calling sonewconn(). When the connection 155 * is established, soisconnected() is called, and transfers the 156 * socket structure to so_q, making it available to accept(). 157 * 158 * If a socket is closed with sockets on either 159 * so_q0 or so_q, these sockets are dropped. 160 * 161 * If higher level protocols are implemented in 162 * the kernel, the wakeups done here will sometimes 163 * cause software-interrupt process scheduling. 164 */ 165 166 void 167 soisconnecting(struct socket *so) 168 { 169 170 KASSERT(solocked(so)); 171 172 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 173 so->so_state |= SS_ISCONNECTING; 174 } 175 176 void 177 soisconnected(struct socket *so) 178 { 179 struct socket *head; 180 181 head = so->so_head; 182 183 KASSERT(solocked(so)); 184 KASSERT(head == NULL || solocked2(so, head)); 185 186 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 187 so->so_state |= SS_ISCONNECTED; 188 if (head && soqremque(so, 0)) { 189 soqinsque(head, so, 1); 190 sorwakeup(head); 191 cv_broadcast(&head->so_cv); 192 } else { 193 cv_broadcast(&so->so_cv); 194 sorwakeup(so); 195 sowwakeup(so); 196 } 197 } 198 199 void 200 soisdisconnecting(struct socket *so) 201 { 202 203 KASSERT(solocked(so)); 204 205 so->so_state &= ~SS_ISCONNECTING; 206 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 207 cv_broadcast(&so->so_cv); 208 sowwakeup(so); 209 sorwakeup(so); 210 } 211 212 void 213 soisdisconnected(struct socket *so) 214 { 215 216 KASSERT(solocked(so)); 217 218 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 219 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 220 cv_broadcast(&so->so_cv); 221 sowwakeup(so); 222 sorwakeup(so); 223 } 224 225 /* 226 * When an attempt at a new connection is noted on a socket 227 * which accepts connections, sonewconn is called. If the 228 * connection is possible (subject to space constraints, etc.) 229 * then we allocate a new structure, propoerly linked into the 230 * data structure of the original socket, and return this. 231 * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED. 232 */ 233 struct socket * 234 sonewconn(struct socket *head, int connstatus) 235 { 236 struct socket *so; 237 int soqueue, error; 238 239 KASSERT(solocked(head)); 240 241 soqueue = connstatus ? 1 : 0; 242 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 243 return ((struct socket *)0); 244 so = soget(false); 245 if (so == NULL) 246 return (NULL); 247 mutex_obj_hold(head->so_lock); 248 so->so_lock = head->so_lock; 249 so->so_type = head->so_type; 250 so->so_options = head->so_options &~ SO_ACCEPTCONN; 251 so->so_linger = head->so_linger; 252 so->so_state = head->so_state | SS_NOFDREF; 253 so->so_nbio = head->so_nbio; 254 so->so_proto = head->so_proto; 255 so->so_timeo = head->so_timeo; 256 so->so_pgid = head->so_pgid; 257 so->so_send = head->so_send; 258 so->so_receive = head->so_receive; 259 so->so_uidinfo = head->so_uidinfo; 260 #ifdef MBUFTRACE 261 so->so_mowner = head->so_mowner; 262 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 263 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 264 #endif 265 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 266 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 267 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 268 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 269 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 270 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 271 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 272 soqinsque(head, so, soqueue); 273 error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, 274 NULL, NULL); 275 KASSERT(solocked(so)); 276 if (error != 0) { 277 (void) soqremque(so, soqueue); 278 soput(so); 279 return (NULL); 280 } 281 if (connstatus) { 282 sorwakeup(head); 283 cv_broadcast(&head->so_cv); 284 so->so_state |= connstatus; 285 } 286 return (so); 287 } 288 289 struct socket * 290 soget(bool waitok) 291 { 292 struct socket *so; 293 294 so = pool_get(&socket_pool, (waitok ? PR_WAITOK : PR_NOWAIT)); 295 if (__predict_false(so == NULL)) 296 return (NULL); 297 memset(so, 0, sizeof(*so)); 298 TAILQ_INIT(&so->so_q0); 299 TAILQ_INIT(&so->so_q); 300 cv_init(&so->so_cv, "socket"); 301 cv_init(&so->so_rcv.sb_cv, "netio"); 302 cv_init(&so->so_snd.sb_cv, "netio"); 303 selinit(&so->so_rcv.sb_sel); 304 selinit(&so->so_snd.sb_sel); 305 so->so_rcv.sb_so = so; 306 so->so_snd.sb_so = so; 307 return so; 308 } 309 310 void 311 soput(struct socket *so) 312 { 313 314 KASSERT(!cv_has_waiters(&so->so_cv)); 315 KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); 316 KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); 317 seldestroy(&so->so_rcv.sb_sel); 318 seldestroy(&so->so_snd.sb_sel); 319 mutex_obj_free(so->so_lock); 320 cv_destroy(&so->so_cv); 321 cv_destroy(&so->so_rcv.sb_cv); 322 cv_destroy(&so->so_snd.sb_cv); 323 pool_put(&socket_pool, so); 324 } 325 326 void 327 soqinsque(struct socket *head, struct socket *so, int q) 328 { 329 330 KASSERT(solocked2(head, so)); 331 332 #ifdef DIAGNOSTIC 333 if (so->so_onq != NULL) 334 panic("soqinsque"); 335 #endif 336 337 so->so_head = head; 338 if (q == 0) { 339 head->so_q0len++; 340 so->so_onq = &head->so_q0; 341 } else { 342 head->so_qlen++; 343 so->so_onq = &head->so_q; 344 } 345 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 346 } 347 348 int 349 soqremque(struct socket *so, int q) 350 { 351 struct socket *head; 352 353 head = so->so_head; 354 355 KASSERT(solocked(so)); 356 if (q == 0) { 357 if (so->so_onq != &head->so_q0) 358 return (0); 359 head->so_q0len--; 360 } else { 361 if (so->so_onq != &head->so_q) 362 return (0); 363 head->so_qlen--; 364 } 365 KASSERT(solocked2(so, head)); 366 TAILQ_REMOVE(so->so_onq, so, so_qe); 367 so->so_onq = NULL; 368 so->so_head = NULL; 369 return (1); 370 } 371 372 /* 373 * Socantsendmore indicates that no more data will be sent on the 374 * socket; it would normally be applied to a socket when the user 375 * informs the system that no more data is to be sent, by the protocol 376 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 377 * will be received, and will normally be applied to the socket by a 378 * protocol when it detects that the peer will send no more data. 379 * Data queued for reading in the socket may yet be read. 380 */ 381 382 void 383 socantsendmore(struct socket *so) 384 { 385 386 KASSERT(solocked(so)); 387 388 so->so_state |= SS_CANTSENDMORE; 389 sowwakeup(so); 390 } 391 392 void 393 socantrcvmore(struct socket *so) 394 { 395 396 KASSERT(solocked(so)); 397 398 so->so_state |= SS_CANTRCVMORE; 399 sorwakeup(so); 400 } 401 402 /* 403 * Wait for data to arrive at/drain from a socket buffer. 404 */ 405 int 406 sbwait(struct sockbuf *sb) 407 { 408 struct socket *so; 409 kmutex_t *lock; 410 int error; 411 412 so = sb->sb_so; 413 414 KASSERT(solocked(so)); 415 416 sb->sb_flags |= SB_NOTIFY; 417 lock = so->so_lock; 418 if ((sb->sb_flags & SB_NOINTR) != 0) 419 error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo); 420 else 421 error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo); 422 if (__predict_false(lock != so->so_lock)) 423 solockretry(so, lock); 424 return error; 425 } 426 427 /* 428 * Wakeup processes waiting on a socket buffer. 429 * Do asynchronous notification via SIGIO 430 * if the socket buffer has the SB_ASYNC flag set. 431 */ 432 void 433 sowakeup(struct socket *so, struct sockbuf *sb, int code) 434 { 435 int band; 436 437 KASSERT(solocked(so)); 438 KASSERT(sb->sb_so == so); 439 440 if (code == POLL_IN) 441 band = POLLIN|POLLRDNORM; 442 else 443 band = POLLOUT|POLLWRNORM; 444 sb->sb_flags &= ~SB_NOTIFY; 445 selnotify(&sb->sb_sel, band, NOTE_SUBMIT); 446 cv_broadcast(&sb->sb_cv); 447 if (sb->sb_flags & SB_ASYNC) 448 fownsignal(so->so_pgid, SIGIO, code, band, so); 449 if (sb->sb_flags & SB_UPCALL) 450 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 451 } 452 453 /* 454 * Socket buffer (struct sockbuf) utility routines. 455 * 456 * Each socket contains two socket buffers: one for sending data and 457 * one for receiving data. Each buffer contains a queue of mbufs, 458 * information about the number of mbufs and amount of data in the 459 * queue, and other fields allowing poll() statements and notification 460 * on data availability to be implemented. 461 * 462 * Data stored in a socket buffer is maintained as a list of records. 463 * Each record is a list of mbufs chained together with the m_next 464 * field. Records are chained together with the m_nextpkt field. The upper 465 * level routine soreceive() expects the following conventions to be 466 * observed when placing information in the receive buffer: 467 * 468 * 1. If the protocol requires each message be preceded by the sender's 469 * name, then a record containing that name must be present before 470 * any associated data (mbuf's must be of type MT_SONAME). 471 * 2. If the protocol supports the exchange of ``access rights'' (really 472 * just additional data associated with the message), and there are 473 * ``rights'' to be received, then a record containing this data 474 * should be present (mbuf's must be of type MT_CONTROL). 475 * 3. If a name or rights record exists, then it must be followed by 476 * a data record, perhaps of zero length. 477 * 478 * Before using a new socket structure it is first necessary to reserve 479 * buffer space to the socket, by calling sbreserve(). This should commit 480 * some of the available buffer space in the system buffer pool for the 481 * socket (currently, it does nothing but enforce limits). The space 482 * should be released by calling sbrelease() when the socket is destroyed. 483 */ 484 485 int 486 sb_max_set(u_long new_sbmax) 487 { 488 int s; 489 490 if (new_sbmax < (16 * 1024)) 491 return (EINVAL); 492 493 s = splsoftnet(); 494 sb_max = new_sbmax; 495 sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES); 496 splx(s); 497 498 return (0); 499 } 500 501 int 502 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 503 { 504 505 KASSERT(so->so_lock == NULL || solocked(so)); 506 507 /* 508 * there's at least one application (a configure script of screen) 509 * which expects a fifo is writable even if it has "some" bytes 510 * in its buffer. 511 * so we want to make sure (hiwat - lowat) >= (some bytes). 512 * 513 * PIPE_BUF here is an arbitrary value chosen as (some bytes) above. 514 * we expect it's large enough for such applications. 515 */ 516 u_long lowat = MAX(sock_loan_thresh, MCLBYTES); 517 u_long hiwat = lowat + PIPE_BUF; 518 519 if (sndcc < hiwat) 520 sndcc = hiwat; 521 if (sbreserve(&so->so_snd, sndcc, so) == 0) 522 goto bad; 523 if (sbreserve(&so->so_rcv, rcvcc, so) == 0) 524 goto bad2; 525 if (so->so_rcv.sb_lowat == 0) 526 so->so_rcv.sb_lowat = 1; 527 if (so->so_snd.sb_lowat == 0) 528 so->so_snd.sb_lowat = lowat; 529 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 530 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 531 return (0); 532 bad2: 533 sbrelease(&so->so_snd, so); 534 bad: 535 return (ENOBUFS); 536 } 537 538 /* 539 * Allot mbufs to a sockbuf. 540 * Attempt to scale mbmax so that mbcnt doesn't become limiting 541 * if buffering efficiency is near the normal case. 542 */ 543 int 544 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so) 545 { 546 struct lwp *l = curlwp; /* XXX */ 547 rlim_t maxcc; 548 struct uidinfo *uidinfo; 549 550 KASSERT(so->so_lock == NULL || solocked(so)); 551 KASSERT(sb->sb_so == so); 552 KASSERT(sb_max_adj != 0); 553 554 if (cc == 0 || cc > sb_max_adj) 555 return (0); 556 if (so) { 557 if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid) 558 maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur; 559 else 560 maxcc = RLIM_INFINITY; 561 uidinfo = so->so_uidinfo; 562 } else { 563 uidinfo = uid_find(0); /* XXX: nothing better */ 564 maxcc = RLIM_INFINITY; 565 } 566 if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc)) 567 return 0; 568 sb->sb_mbmax = min(cc * 2, sb_max); 569 if (sb->sb_lowat > sb->sb_hiwat) 570 sb->sb_lowat = sb->sb_hiwat; 571 return (1); 572 } 573 574 /* 575 * Free mbufs held by a socket, and reserved mbuf space. We do not assert 576 * that the socket is held locked here: see sorflush(). 577 */ 578 void 579 sbrelease(struct sockbuf *sb, struct socket *so) 580 { 581 582 KASSERT(sb->sb_so == so); 583 584 sbflush(sb); 585 (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); 586 sb->sb_mbmax = 0; 587 } 588 589 /* 590 * Routines to add and remove 591 * data from an mbuf queue. 592 * 593 * The routines sbappend() or sbappendrecord() are normally called to 594 * append new mbufs to a socket buffer, after checking that adequate 595 * space is available, comparing the function sbspace() with the amount 596 * of data to be added. sbappendrecord() differs from sbappend() in 597 * that data supplied is treated as the beginning of a new record. 598 * To place a sender's address, optional access rights, and data in a 599 * socket receive buffer, sbappendaddr() should be used. To place 600 * access rights and data in a socket receive buffer, sbappendrights() 601 * should be used. In either case, the new data begins a new record. 602 * Note that unlike sbappend() and sbappendrecord(), these routines check 603 * for the caller that there will be enough space to store the data. 604 * Each fails if there is not enough space, or if it cannot find mbufs 605 * to store additional information in. 606 * 607 * Reliable protocols may use the socket send buffer to hold data 608 * awaiting acknowledgement. Data is normally copied from a socket 609 * send buffer in a protocol with m_copy for output to a peer, 610 * and then removing the data from the socket buffer with sbdrop() 611 * or sbdroprecord() when the data is acknowledged by the peer. 612 */ 613 614 #ifdef SOCKBUF_DEBUG 615 void 616 sblastrecordchk(struct sockbuf *sb, const char *where) 617 { 618 struct mbuf *m = sb->sb_mb; 619 620 KASSERT(solocked(sb->sb_so)); 621 622 while (m && m->m_nextpkt) 623 m = m->m_nextpkt; 624 625 if (m != sb->sb_lastrecord) { 626 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 627 sb->sb_mb, sb->sb_lastrecord, m); 628 printf("packet chain:\n"); 629 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 630 printf("\t%p\n", m); 631 panic("sblastrecordchk from %s", where); 632 } 633 } 634 635 void 636 sblastmbufchk(struct sockbuf *sb, const char *where) 637 { 638 struct mbuf *m = sb->sb_mb; 639 struct mbuf *n; 640 641 KASSERT(solocked(sb->sb_so)); 642 643 while (m && m->m_nextpkt) 644 m = m->m_nextpkt; 645 646 while (m && m->m_next) 647 m = m->m_next; 648 649 if (m != sb->sb_mbtail) { 650 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 651 sb->sb_mb, sb->sb_mbtail, m); 652 printf("packet tree:\n"); 653 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 654 printf("\t"); 655 for (n = m; n != NULL; n = n->m_next) 656 printf("%p ", n); 657 printf("\n"); 658 } 659 panic("sblastmbufchk from %s", where); 660 } 661 } 662 #endif /* SOCKBUF_DEBUG */ 663 664 /* 665 * Link a chain of records onto a socket buffer 666 */ 667 #define SBLINKRECORDCHAIN(sb, m0, mlast) \ 668 do { \ 669 if ((sb)->sb_lastrecord != NULL) \ 670 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 671 else \ 672 (sb)->sb_mb = (m0); \ 673 (sb)->sb_lastrecord = (mlast); \ 674 } while (/*CONSTCOND*/0) 675 676 677 #define SBLINKRECORD(sb, m0) \ 678 SBLINKRECORDCHAIN(sb, m0, m0) 679 680 /* 681 * Append mbuf chain m to the last record in the 682 * socket buffer sb. The additional space associated 683 * the mbuf chain is recorded in sb. Empty mbufs are 684 * discarded and mbufs are compacted where possible. 685 */ 686 void 687 sbappend(struct sockbuf *sb, struct mbuf *m) 688 { 689 struct mbuf *n; 690 691 KASSERT(solocked(sb->sb_so)); 692 693 if (m == 0) 694 return; 695 696 #ifdef MBUFTRACE 697 m_claimm(m, sb->sb_mowner); 698 #endif 699 700 SBLASTRECORDCHK(sb, "sbappend 1"); 701 702 if ((n = sb->sb_lastrecord) != NULL) { 703 /* 704 * XXX Would like to simply use sb_mbtail here, but 705 * XXX I need to verify that I won't miss an EOR that 706 * XXX way. 707 */ 708 do { 709 if (n->m_flags & M_EOR) { 710 sbappendrecord(sb, m); /* XXXXXX!!!! */ 711 return; 712 } 713 } while (n->m_next && (n = n->m_next)); 714 } else { 715 /* 716 * If this is the first record in the socket buffer, it's 717 * also the last record. 718 */ 719 sb->sb_lastrecord = m; 720 } 721 sbcompress(sb, m, n); 722 SBLASTRECORDCHK(sb, "sbappend 2"); 723 } 724 725 /* 726 * This version of sbappend() should only be used when the caller 727 * absolutely knows that there will never be more than one record 728 * in the socket buffer, that is, a stream protocol (such as TCP). 729 */ 730 void 731 sbappendstream(struct sockbuf *sb, struct mbuf *m) 732 { 733 734 KASSERT(solocked(sb->sb_so)); 735 KDASSERT(m->m_nextpkt == NULL); 736 KASSERT(sb->sb_mb == sb->sb_lastrecord); 737 738 SBLASTMBUFCHK(sb, __func__); 739 740 #ifdef MBUFTRACE 741 m_claimm(m, sb->sb_mowner); 742 #endif 743 744 sbcompress(sb, m, sb->sb_mbtail); 745 746 sb->sb_lastrecord = sb->sb_mb; 747 SBLASTRECORDCHK(sb, __func__); 748 } 749 750 #ifdef SOCKBUF_DEBUG 751 void 752 sbcheck(struct sockbuf *sb) 753 { 754 struct mbuf *m, *m2; 755 u_long len, mbcnt; 756 757 KASSERT(solocked(sb->sb_so)); 758 759 len = 0; 760 mbcnt = 0; 761 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 762 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 763 len += m2->m_len; 764 mbcnt += MSIZE; 765 if (m2->m_flags & M_EXT) 766 mbcnt += m2->m_ext.ext_size; 767 if (m2->m_nextpkt != NULL) 768 panic("sbcheck nextpkt"); 769 } 770 } 771 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 772 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 773 mbcnt, sb->sb_mbcnt); 774 panic("sbcheck"); 775 } 776 } 777 #endif 778 779 /* 780 * As above, except the mbuf chain 781 * begins a new record. 782 */ 783 void 784 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 785 { 786 struct mbuf *m; 787 788 KASSERT(solocked(sb->sb_so)); 789 790 if (m0 == 0) 791 return; 792 793 #ifdef MBUFTRACE 794 m_claimm(m0, sb->sb_mowner); 795 #endif 796 /* 797 * Put the first mbuf on the queue. 798 * Note this permits zero length records. 799 */ 800 sballoc(sb, m0); 801 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 802 SBLINKRECORD(sb, m0); 803 m = m0->m_next; 804 m0->m_next = 0; 805 if (m && (m0->m_flags & M_EOR)) { 806 m0->m_flags &= ~M_EOR; 807 m->m_flags |= M_EOR; 808 } 809 sbcompress(sb, m, m0); 810 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 811 } 812 813 /* 814 * As above except that OOB data 815 * is inserted at the beginning of the sockbuf, 816 * but after any other OOB data. 817 */ 818 void 819 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 820 { 821 struct mbuf *m, **mp; 822 823 KASSERT(solocked(sb->sb_so)); 824 825 if (m0 == 0) 826 return; 827 828 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 829 830 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 831 again: 832 switch (m->m_type) { 833 834 case MT_OOBDATA: 835 continue; /* WANT next train */ 836 837 case MT_CONTROL: 838 if ((m = m->m_next) != NULL) 839 goto again; /* inspect THIS train further */ 840 } 841 break; 842 } 843 /* 844 * Put the first mbuf on the queue. 845 * Note this permits zero length records. 846 */ 847 sballoc(sb, m0); 848 m0->m_nextpkt = *mp; 849 if (*mp == NULL) { 850 /* m0 is actually the new tail */ 851 sb->sb_lastrecord = m0; 852 } 853 *mp = m0; 854 m = m0->m_next; 855 m0->m_next = 0; 856 if (m && (m0->m_flags & M_EOR)) { 857 m0->m_flags &= ~M_EOR; 858 m->m_flags |= M_EOR; 859 } 860 sbcompress(sb, m, m0); 861 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 862 } 863 864 /* 865 * Append address and data, and optionally, control (ancillary) data 866 * to the receive queue of a socket. If present, 867 * m0 must include a packet header with total length. 868 * Returns 0 if no space in sockbuf or insufficient mbufs. 869 */ 870 int 871 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, 872 struct mbuf *control) 873 { 874 struct mbuf *m, *n, *nlast; 875 int space, len; 876 877 KASSERT(solocked(sb->sb_so)); 878 879 space = asa->sa_len; 880 881 if (m0 != NULL) { 882 if ((m0->m_flags & M_PKTHDR) == 0) 883 panic("sbappendaddr"); 884 space += m0->m_pkthdr.len; 885 #ifdef MBUFTRACE 886 m_claimm(m0, sb->sb_mowner); 887 #endif 888 } 889 for (n = control; n; n = n->m_next) { 890 space += n->m_len; 891 MCLAIM(n, sb->sb_mowner); 892 if (n->m_next == 0) /* keep pointer to last control buf */ 893 break; 894 } 895 if (space > sbspace(sb)) 896 return (0); 897 MGET(m, M_DONTWAIT, MT_SONAME); 898 if (m == 0) 899 return (0); 900 MCLAIM(m, sb->sb_mowner); 901 /* 902 * XXX avoid 'comparison always true' warning which isn't easily 903 * avoided. 904 */ 905 len = asa->sa_len; 906 if (len > MLEN) { 907 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 908 if ((m->m_flags & M_EXT) == 0) { 909 m_free(m); 910 return (0); 911 } 912 } 913 m->m_len = asa->sa_len; 914 memcpy(mtod(m, void *), asa, asa->sa_len); 915 if (n) 916 n->m_next = m0; /* concatenate data to control */ 917 else 918 control = m0; 919 m->m_next = control; 920 921 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 922 923 for (n = m; n->m_next != NULL; n = n->m_next) 924 sballoc(sb, n); 925 sballoc(sb, n); 926 nlast = n; 927 SBLINKRECORD(sb, m); 928 929 sb->sb_mbtail = nlast; 930 SBLASTMBUFCHK(sb, "sbappendaddr"); 931 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 932 933 return (1); 934 } 935 936 /* 937 * Helper for sbappendchainaddr: prepend a struct sockaddr* to 938 * an mbuf chain. 939 */ 940 static inline struct mbuf * 941 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0, 942 const struct sockaddr *asa) 943 { 944 struct mbuf *m; 945 const int salen = asa->sa_len; 946 947 KASSERT(solocked(sb->sb_so)); 948 949 /* only the first in each chain need be a pkthdr */ 950 MGETHDR(m, M_DONTWAIT, MT_SONAME); 951 if (m == 0) 952 return (0); 953 MCLAIM(m, sb->sb_mowner); 954 #ifdef notyet 955 if (salen > MHLEN) { 956 MEXTMALLOC(m, salen, M_NOWAIT); 957 if ((m->m_flags & M_EXT) == 0) { 958 m_free(m); 959 return (0); 960 } 961 } 962 #else 963 KASSERT(salen <= MHLEN); 964 #endif 965 m->m_len = salen; 966 memcpy(mtod(m, void *), asa, salen); 967 m->m_next = m0; 968 m->m_pkthdr.len = salen + m0->m_pkthdr.len; 969 970 return m; 971 } 972 973 int 974 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa, 975 struct mbuf *m0, int sbprio) 976 { 977 int space; 978 struct mbuf *m, *n, *n0, *nlast; 979 int error; 980 981 KASSERT(solocked(sb->sb_so)); 982 983 /* 984 * XXX sbprio reserved for encoding priority of this* request: 985 * SB_PRIO_NONE --> honour normal sb limits 986 * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space, 987 * take whole chain. Intended for large requests 988 * that should be delivered atomically (all, or none). 989 * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow 990 * over normal socket limits, for messages indicating 991 * buffer overflow in earlier normal/lower-priority messages 992 * SB_PRIO_BESTEFFORT --> ignore limits entirely. 993 * Intended for kernel-generated messages only. 994 * Up to generator to avoid total mbuf resource exhaustion. 995 */ 996 (void)sbprio; 997 998 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 999 panic("sbappendaddrchain"); 1000 1001 space = sbspace(sb); 1002 1003 #ifdef notyet 1004 /* 1005 * Enforce SB_PRIO_* limits as described above. 1006 */ 1007 #endif 1008 1009 n0 = NULL; 1010 nlast = NULL; 1011 for (m = m0; m; m = m->m_nextpkt) { 1012 struct mbuf *np; 1013 1014 #ifdef MBUFTRACE 1015 m_claimm(m, sb->sb_mowner); 1016 #endif 1017 1018 /* Prepend sockaddr to this record (m) of input chain m0 */ 1019 n = m_prepend_sockaddr(sb, m, asa); 1020 if (n == NULL) { 1021 error = ENOBUFS; 1022 goto bad; 1023 } 1024 1025 /* Append record (asa+m) to end of new chain n0 */ 1026 if (n0 == NULL) { 1027 n0 = n; 1028 } else { 1029 nlast->m_nextpkt = n; 1030 } 1031 /* Keep track of last record on new chain */ 1032 nlast = n; 1033 1034 for (np = n; np; np = np->m_next) 1035 sballoc(sb, np); 1036 } 1037 1038 SBLASTRECORDCHK(sb, "sbappendaddrchain 1"); 1039 1040 /* Drop the entire chain of (asa+m) records onto the socket */ 1041 SBLINKRECORDCHAIN(sb, n0, nlast); 1042 1043 SBLASTRECORDCHK(sb, "sbappendaddrchain 2"); 1044 1045 for (m = nlast; m->m_next; m = m->m_next) 1046 ; 1047 sb->sb_mbtail = m; 1048 SBLASTMBUFCHK(sb, "sbappendaddrchain"); 1049 1050 return (1); 1051 1052 bad: 1053 /* 1054 * On error, free the prepended addreseses. For consistency 1055 * with sbappendaddr(), leave it to our caller to free 1056 * the input record chain passed to us as m0. 1057 */ 1058 while ((n = n0) != NULL) { 1059 struct mbuf *np; 1060 1061 /* Undo the sballoc() of this record */ 1062 for (np = n; np; np = np->m_next) 1063 sbfree(sb, np); 1064 1065 n0 = n->m_nextpkt; /* iterate at next prepended address */ 1066 MFREE(n, np); /* free prepended address (not data) */ 1067 } 1068 return 0; 1069 } 1070 1071 1072 int 1073 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 1074 { 1075 struct mbuf *m, *mlast, *n; 1076 int space; 1077 1078 KASSERT(solocked(sb->sb_so)); 1079 1080 space = 0; 1081 if (control == 0) 1082 panic("sbappendcontrol"); 1083 for (m = control; ; m = m->m_next) { 1084 space += m->m_len; 1085 MCLAIM(m, sb->sb_mowner); 1086 if (m->m_next == 0) 1087 break; 1088 } 1089 n = m; /* save pointer to last control buffer */ 1090 for (m = m0; m; m = m->m_next) { 1091 MCLAIM(m, sb->sb_mowner); 1092 space += m->m_len; 1093 } 1094 if (space > sbspace(sb)) 1095 return (0); 1096 n->m_next = m0; /* concatenate data to control */ 1097 1098 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 1099 1100 for (m = control; m->m_next != NULL; m = m->m_next) 1101 sballoc(sb, m); 1102 sballoc(sb, m); 1103 mlast = m; 1104 SBLINKRECORD(sb, control); 1105 1106 sb->sb_mbtail = mlast; 1107 SBLASTMBUFCHK(sb, "sbappendcontrol"); 1108 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 1109 1110 return (1); 1111 } 1112 1113 /* 1114 * Compress mbuf chain m into the socket 1115 * buffer sb following mbuf n. If n 1116 * is null, the buffer is presumed empty. 1117 */ 1118 void 1119 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1120 { 1121 int eor; 1122 struct mbuf *o; 1123 1124 KASSERT(solocked(sb->sb_so)); 1125 1126 eor = 0; 1127 while (m) { 1128 eor |= m->m_flags & M_EOR; 1129 if (m->m_len == 0 && 1130 (eor == 0 || 1131 (((o = m->m_next) || (o = n)) && 1132 o->m_type == m->m_type))) { 1133 if (sb->sb_lastrecord == m) 1134 sb->sb_lastrecord = m->m_next; 1135 m = m_free(m); 1136 continue; 1137 } 1138 if (n && (n->m_flags & M_EOR) == 0 && 1139 /* M_TRAILINGSPACE() checks buffer writeability */ 1140 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 1141 m->m_len <= M_TRAILINGSPACE(n) && 1142 n->m_type == m->m_type) { 1143 memcpy(mtod(n, char *) + n->m_len, mtod(m, void *), 1144 (unsigned)m->m_len); 1145 n->m_len += m->m_len; 1146 sb->sb_cc += m->m_len; 1147 m = m_free(m); 1148 continue; 1149 } 1150 if (n) 1151 n->m_next = m; 1152 else 1153 sb->sb_mb = m; 1154 sb->sb_mbtail = m; 1155 sballoc(sb, m); 1156 n = m; 1157 m->m_flags &= ~M_EOR; 1158 m = m->m_next; 1159 n->m_next = 0; 1160 } 1161 if (eor) { 1162 if (n) 1163 n->m_flags |= eor; 1164 else 1165 printf("semi-panic: sbcompress\n"); 1166 } 1167 SBLASTMBUFCHK(sb, __func__); 1168 } 1169 1170 /* 1171 * Free all mbufs in a sockbuf. 1172 * Check that all resources are reclaimed. 1173 */ 1174 void 1175 sbflush(struct sockbuf *sb) 1176 { 1177 1178 KASSERT(solocked(sb->sb_so)); 1179 KASSERT((sb->sb_flags & SB_LOCK) == 0); 1180 1181 while (sb->sb_mbcnt) 1182 sbdrop(sb, (int)sb->sb_cc); 1183 1184 KASSERT(sb->sb_cc == 0); 1185 KASSERT(sb->sb_mb == NULL); 1186 KASSERT(sb->sb_mbtail == NULL); 1187 KASSERT(sb->sb_lastrecord == NULL); 1188 } 1189 1190 /* 1191 * Drop data from (the front of) a sockbuf. 1192 */ 1193 void 1194 sbdrop(struct sockbuf *sb, int len) 1195 { 1196 struct mbuf *m, *mn, *next; 1197 1198 KASSERT(solocked(sb->sb_so)); 1199 1200 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1201 while (len > 0) { 1202 if (m == 0) { 1203 if (next == 0) 1204 panic("sbdrop"); 1205 m = next; 1206 next = m->m_nextpkt; 1207 continue; 1208 } 1209 if (m->m_len > len) { 1210 m->m_len -= len; 1211 m->m_data += len; 1212 sb->sb_cc -= len; 1213 break; 1214 } 1215 len -= m->m_len; 1216 sbfree(sb, m); 1217 MFREE(m, mn); 1218 m = mn; 1219 } 1220 while (m && m->m_len == 0) { 1221 sbfree(sb, m); 1222 MFREE(m, mn); 1223 m = mn; 1224 } 1225 if (m) { 1226 sb->sb_mb = m; 1227 m->m_nextpkt = next; 1228 } else 1229 sb->sb_mb = next; 1230 /* 1231 * First part is an inline SB_EMPTY_FIXUP(). Second part 1232 * makes sure sb_lastrecord is up-to-date if we dropped 1233 * part of the last record. 1234 */ 1235 m = sb->sb_mb; 1236 if (m == NULL) { 1237 sb->sb_mbtail = NULL; 1238 sb->sb_lastrecord = NULL; 1239 } else if (m->m_nextpkt == NULL) 1240 sb->sb_lastrecord = m; 1241 } 1242 1243 /* 1244 * Drop a record off the front of a sockbuf 1245 * and move the next record to the front. 1246 */ 1247 void 1248 sbdroprecord(struct sockbuf *sb) 1249 { 1250 struct mbuf *m, *mn; 1251 1252 KASSERT(solocked(sb->sb_so)); 1253 1254 m = sb->sb_mb; 1255 if (m) { 1256 sb->sb_mb = m->m_nextpkt; 1257 do { 1258 sbfree(sb, m); 1259 MFREE(m, mn); 1260 } while ((m = mn) != NULL); 1261 } 1262 SB_EMPTY_FIXUP(sb); 1263 } 1264 1265 /* 1266 * Create a "control" mbuf containing the specified data 1267 * with the specified type for presentation on a socket buffer. 1268 */ 1269 struct mbuf * 1270 sbcreatecontrol(void *p, int size, int type, int level) 1271 { 1272 struct cmsghdr *cp; 1273 struct mbuf *m; 1274 1275 if (CMSG_SPACE(size) > MCLBYTES) { 1276 printf("sbcreatecontrol: message too large %d\n", size); 1277 return NULL; 1278 } 1279 1280 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 1281 return ((struct mbuf *) NULL); 1282 if (CMSG_SPACE(size) > MLEN) { 1283 MCLGET(m, M_DONTWAIT); 1284 if ((m->m_flags & M_EXT) == 0) { 1285 m_free(m); 1286 return NULL; 1287 } 1288 } 1289 cp = mtod(m, struct cmsghdr *); 1290 memcpy(CMSG_DATA(cp), p, size); 1291 m->m_len = CMSG_SPACE(size); 1292 cp->cmsg_len = CMSG_LEN(size); 1293 cp->cmsg_level = level; 1294 cp->cmsg_type = type; 1295 return (m); 1296 } 1297 1298 void 1299 solockretry(struct socket *so, kmutex_t *lock) 1300 { 1301 1302 while (lock != so->so_lock) { 1303 mutex_exit(lock); 1304 lock = so->so_lock; 1305 mutex_enter(lock); 1306 } 1307 } 1308 1309 bool 1310 solocked(struct socket *so) 1311 { 1312 1313 return mutex_owned(so->so_lock); 1314 } 1315 1316 bool 1317 solocked2(struct socket *so1, struct socket *so2) 1318 { 1319 kmutex_t *lock; 1320 1321 lock = so1->so_lock; 1322 if (lock != so2->so_lock) 1323 return false; 1324 return mutex_owned(lock); 1325 } 1326 1327 /* 1328 * Assign a default lock to a new socket. For PRU_ATTACH, and done by 1329 * protocols that do not have special locking requirements. 1330 */ 1331 void 1332 sosetlock(struct socket *so) 1333 { 1334 kmutex_t *lock; 1335 1336 if (so->so_lock == NULL) { 1337 lock = softnet_lock; 1338 so->so_lock = lock; 1339 mutex_obj_hold(lock); 1340 mutex_enter(lock); 1341 } 1342 1343 /* In all cases, lock must be held on return from PRU_ATTACH. */ 1344 KASSERT(solocked(so)); 1345 } 1346 1347 /* 1348 * Set lock on sockbuf sb; sleep if lock is already held. 1349 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. 1350 * Returns error without lock if sleep is interrupted. 1351 */ 1352 int 1353 sblock(struct sockbuf *sb, int wf) 1354 { 1355 struct socket *so; 1356 kmutex_t *lock; 1357 int error; 1358 1359 KASSERT(solocked(sb->sb_so)); 1360 1361 for (;;) { 1362 if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) { 1363 sb->sb_flags |= SB_LOCK; 1364 return 0; 1365 } 1366 if (wf != M_WAITOK) 1367 return EWOULDBLOCK; 1368 so = sb->sb_so; 1369 lock = so->so_lock; 1370 if ((sb->sb_flags & SB_NOINTR) != 0) { 1371 cv_wait(&so->so_cv, lock); 1372 error = 0; 1373 } else 1374 error = cv_wait_sig(&so->so_cv, lock); 1375 if (__predict_false(lock != so->so_lock)) 1376 solockretry(so, lock); 1377 if (error != 0) 1378 return error; 1379 } 1380 } 1381 1382 void 1383 sbunlock(struct sockbuf *sb) 1384 { 1385 struct socket *so; 1386 1387 so = sb->sb_so; 1388 1389 KASSERT(solocked(so)); 1390 KASSERT((sb->sb_flags & SB_LOCK) != 0); 1391 1392 sb->sb_flags &= ~SB_LOCK; 1393 cv_broadcast(&so->so_cv); 1394 } 1395 1396 int 1397 sowait(struct socket *so, int timo) 1398 { 1399 kmutex_t *lock; 1400 int error; 1401 1402 KASSERT(solocked(so)); 1403 1404 lock = so->so_lock; 1405 error = cv_timedwait_sig(&so->so_cv, lock, timo); 1406 if (__predict_false(lock != so->so_lock)) 1407 solockretry(so, lock); 1408 return error; 1409 } 1410