1 /* $NetBSD: uipc_socket2.c,v 1.50 2003/04/17 13:12:39 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.50 2003/04/17 13:12:39 fvdl Exp $"); 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/buf.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 53 /* 54 * Primitive routines for operating on sockets and socket buffers 55 */ 56 57 /* strings for sleep message: */ 58 const char netcon[] = "netcon"; 59 const char netcls[] = "netcls"; 60 const char netio[] = "netio"; 61 const char netlck[] = "netlck"; 62 63 /* 64 * Procedures to manipulate state flags of socket 65 * and do appropriate wakeups. Normal sequence from the 66 * active (originating) side is that soisconnecting() is 67 * called during processing of connect() call, 68 * resulting in an eventual call to soisconnected() if/when the 69 * connection is established. When the connection is torn down 70 * soisdisconnecting() is called during processing of disconnect() call, 71 * and soisdisconnected() is called when the connection to the peer 72 * is totally severed. The semantics of these routines are such that 73 * connectionless protocols can call soisconnected() and soisdisconnected() 74 * only, bypassing the in-progress calls when setting up a ``connection'' 75 * takes no time. 76 * 77 * From the passive side, a socket is created with 78 * two queues of sockets: so_q0 for connections in progress 79 * and so_q for connections already made and awaiting user acceptance. 80 * As a protocol is preparing incoming connections, it creates a socket 81 * structure queued on so_q0 by calling sonewconn(). When the connection 82 * is established, soisconnected() is called, and transfers the 83 * socket structure to so_q, making it available to accept(). 84 * 85 * If a socket is closed with sockets on either 86 * so_q0 or so_q, these sockets are dropped. 87 * 88 * If higher level protocols are implemented in 89 * the kernel, the wakeups done here will sometimes 90 * cause software-interrupt process scheduling. 91 */ 92 93 void 94 soisconnecting(struct socket *so) 95 { 96 97 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 98 so->so_state |= SS_ISCONNECTING; 99 } 100 101 void 102 soisconnected(struct socket *so) 103 { 104 struct socket *head; 105 106 head = so->so_head; 107 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 108 so->so_state |= SS_ISCONNECTED; 109 if (head && soqremque(so, 0)) { 110 soqinsque(head, so, 1); 111 sorwakeup(head); 112 wakeup((caddr_t)&head->so_timeo); 113 } else { 114 wakeup((caddr_t)&so->so_timeo); 115 sorwakeup(so); 116 sowwakeup(so); 117 } 118 } 119 120 void 121 soisdisconnecting(struct socket *so) 122 { 123 124 so->so_state &= ~SS_ISCONNECTING; 125 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 126 wakeup((caddr_t)&so->so_timeo); 127 sowwakeup(so); 128 sorwakeup(so); 129 } 130 131 void 132 soisdisconnected(struct socket *so) 133 { 134 135 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 136 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 137 wakeup((caddr_t)&so->so_timeo); 138 sowwakeup(so); 139 sorwakeup(so); 140 } 141 142 /* 143 * When an attempt at a new connection is noted on a socket 144 * which accepts connections, sonewconn is called. If the 145 * connection is possible (subject to space constraints, etc.) 146 * then we allocate a new structure, propoerly linked into the 147 * data structure of the original socket, and return this. 148 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 149 * 150 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 151 * to catch calls that are missing the (new) second parameter. 152 */ 153 struct socket * 154 sonewconn1(struct socket *head, int connstatus) 155 { 156 struct socket *so; 157 int soqueue; 158 159 soqueue = connstatus ? 1 : 0; 160 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 161 return ((struct socket *)0); 162 so = pool_get(&socket_pool, PR_NOWAIT); 163 if (so == NULL) 164 return (NULL); 165 memset((caddr_t)so, 0, sizeof(*so)); 166 so->so_type = head->so_type; 167 so->so_options = head->so_options &~ SO_ACCEPTCONN; 168 so->so_linger = head->so_linger; 169 so->so_state = head->so_state | SS_NOFDREF; 170 so->so_proto = head->so_proto; 171 so->so_timeo = head->so_timeo; 172 so->so_pgid = head->so_pgid; 173 so->so_send = head->so_send; 174 so->so_receive = head->so_receive; 175 so->so_uid = head->so_uid; 176 #ifdef MBUFTRACE 177 so->so_mowner = head->so_mowner; 178 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 179 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 180 #endif 181 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 182 soqinsque(head, so, soqueue); 183 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 184 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 185 (struct proc *)0)) { 186 (void) soqremque(so, soqueue); 187 pool_put(&socket_pool, so); 188 return (NULL); 189 } 190 if (connstatus) { 191 sorwakeup(head); 192 wakeup((caddr_t)&head->so_timeo); 193 so->so_state |= connstatus; 194 } 195 return (so); 196 } 197 198 void 199 soqinsque(struct socket *head, struct socket *so, int q) 200 { 201 202 #ifdef DIAGNOSTIC 203 if (so->so_onq != NULL) 204 panic("soqinsque"); 205 #endif 206 207 so->so_head = head; 208 if (q == 0) { 209 head->so_q0len++; 210 so->so_onq = &head->so_q0; 211 } else { 212 head->so_qlen++; 213 so->so_onq = &head->so_q; 214 } 215 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 216 } 217 218 int 219 soqremque(struct socket *so, int q) 220 { 221 struct socket *head; 222 223 head = so->so_head; 224 if (q == 0) { 225 if (so->so_onq != &head->so_q0) 226 return (0); 227 head->so_q0len--; 228 } else { 229 if (so->so_onq != &head->so_q) 230 return (0); 231 head->so_qlen--; 232 } 233 TAILQ_REMOVE(so->so_onq, so, so_qe); 234 so->so_onq = NULL; 235 so->so_head = NULL; 236 return (1); 237 } 238 239 /* 240 * Socantsendmore indicates that no more data will be sent on the 241 * socket; it would normally be applied to a socket when the user 242 * informs the system that no more data is to be sent, by the protocol 243 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 244 * will be received, and will normally be applied to the socket by a 245 * protocol when it detects that the peer will send no more data. 246 * Data queued for reading in the socket may yet be read. 247 */ 248 249 void 250 socantsendmore(struct socket *so) 251 { 252 253 so->so_state |= SS_CANTSENDMORE; 254 sowwakeup(so); 255 } 256 257 void 258 socantrcvmore(struct socket *so) 259 { 260 261 so->so_state |= SS_CANTRCVMORE; 262 sorwakeup(so); 263 } 264 265 /* 266 * Wait for data to arrive at/drain from a socket buffer. 267 */ 268 int 269 sbwait(struct sockbuf *sb) 270 { 271 272 sb->sb_flags |= SB_WAIT; 273 return (tsleep((caddr_t)&sb->sb_cc, 274 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 275 sb->sb_timeo)); 276 } 277 278 /* 279 * Lock a sockbuf already known to be locked; 280 * return any error returned from sleep (EINTR). 281 */ 282 int 283 sb_lock(struct sockbuf *sb) 284 { 285 int error; 286 287 while (sb->sb_flags & SB_LOCK) { 288 sb->sb_flags |= SB_WANT; 289 error = tsleep((caddr_t)&sb->sb_flags, 290 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 291 netlck, 0); 292 if (error) 293 return (error); 294 } 295 sb->sb_flags |= SB_LOCK; 296 return (0); 297 } 298 299 /* 300 * Wakeup processes waiting on a socket buffer. 301 * Do asynchronous notification via SIGIO 302 * if the socket buffer has the SB_ASYNC flag set. 303 */ 304 void 305 sowakeup(struct socket *so, struct sockbuf *sb) 306 { 307 struct proc *p; 308 309 selnotify(&sb->sb_sel, 0); 310 sb->sb_flags &= ~SB_SEL; 311 if (sb->sb_flags & SB_WAIT) { 312 sb->sb_flags &= ~SB_WAIT; 313 wakeup((caddr_t)&sb->sb_cc); 314 } 315 if (sb->sb_flags & SB_ASYNC) { 316 if (so->so_pgid < 0) 317 gsignal(-so->so_pgid, SIGIO); 318 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 319 psignal(p, SIGIO); 320 } 321 if (sb->sb_flags & SB_UPCALL) 322 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 323 } 324 325 /* 326 * Socket buffer (struct sockbuf) utility routines. 327 * 328 * Each socket contains two socket buffers: one for sending data and 329 * one for receiving data. Each buffer contains a queue of mbufs, 330 * information about the number of mbufs and amount of data in the 331 * queue, and other fields allowing poll() statements and notification 332 * on data availability to be implemented. 333 * 334 * Data stored in a socket buffer is maintained as a list of records. 335 * Each record is a list of mbufs chained together with the m_next 336 * field. Records are chained together with the m_nextpkt field. The upper 337 * level routine soreceive() expects the following conventions to be 338 * observed when placing information in the receive buffer: 339 * 340 * 1. If the protocol requires each message be preceded by the sender's 341 * name, then a record containing that name must be present before 342 * any associated data (mbuf's must be of type MT_SONAME). 343 * 2. If the protocol supports the exchange of ``access rights'' (really 344 * just additional data associated with the message), and there are 345 * ``rights'' to be received, then a record containing this data 346 * should be present (mbuf's must be of type MT_CONTROL). 347 * 3. If a name or rights record exists, then it must be followed by 348 * a data record, perhaps of zero length. 349 * 350 * Before using a new socket structure it is first necessary to reserve 351 * buffer space to the socket, by calling sbreserve(). This should commit 352 * some of the available buffer space in the system buffer pool for the 353 * socket (currently, it does nothing but enforce limits). The space 354 * should be released by calling sbrelease() when the socket is destroyed. 355 */ 356 357 int 358 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 359 { 360 361 if (sbreserve(&so->so_snd, sndcc) == 0) 362 goto bad; 363 if (sbreserve(&so->so_rcv, rcvcc) == 0) 364 goto bad2; 365 if (so->so_rcv.sb_lowat == 0) 366 so->so_rcv.sb_lowat = 1; 367 if (so->so_snd.sb_lowat == 0) 368 so->so_snd.sb_lowat = MCLBYTES; 369 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 370 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 371 return (0); 372 bad2: 373 sbrelease(&so->so_snd); 374 bad: 375 return (ENOBUFS); 376 } 377 378 /* 379 * Allot mbufs to a sockbuf. 380 * Attempt to scale mbmax so that mbcnt doesn't become limiting 381 * if buffering efficiency is near the normal case. 382 */ 383 int 384 sbreserve(struct sockbuf *sb, u_long cc) 385 { 386 387 if (cc == 0 || 388 (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 389 return (0); 390 sb->sb_hiwat = cc; 391 sb->sb_mbmax = min(cc * 2, sb_max); 392 if (sb->sb_lowat > sb->sb_hiwat) 393 sb->sb_lowat = sb->sb_hiwat; 394 return (1); 395 } 396 397 /* 398 * Free mbufs held by a socket, and reserved mbuf space. 399 */ 400 void 401 sbrelease(struct sockbuf *sb) 402 { 403 404 sbflush(sb); 405 sb->sb_hiwat = sb->sb_mbmax = 0; 406 } 407 408 /* 409 * Routines to add and remove 410 * data from an mbuf queue. 411 * 412 * The routines sbappend() or sbappendrecord() are normally called to 413 * append new mbufs to a socket buffer, after checking that adequate 414 * space is available, comparing the function sbspace() with the amount 415 * of data to be added. sbappendrecord() differs from sbappend() in 416 * that data supplied is treated as the beginning of a new record. 417 * To place a sender's address, optional access rights, and data in a 418 * socket receive buffer, sbappendaddr() should be used. To place 419 * access rights and data in a socket receive buffer, sbappendrights() 420 * should be used. In either case, the new data begins a new record. 421 * Note that unlike sbappend() and sbappendrecord(), these routines check 422 * for the caller that there will be enough space to store the data. 423 * Each fails if there is not enough space, or if it cannot find mbufs 424 * to store additional information in. 425 * 426 * Reliable protocols may use the socket send buffer to hold data 427 * awaiting acknowledgement. Data is normally copied from a socket 428 * send buffer in a protocol with m_copy for output to a peer, 429 * and then removing the data from the socket buffer with sbdrop() 430 * or sbdroprecord() when the data is acknowledged by the peer. 431 */ 432 433 #ifdef SOCKBUF_DEBUG 434 void 435 sblastrecordchk(struct sockbuf *sb, const char *where) 436 { 437 struct mbuf *m = sb->sb_mb; 438 439 while (m && m->m_nextpkt) 440 m = m->m_nextpkt; 441 442 if (m != sb->sb_lastrecord) { 443 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 444 sb->sb_mb, sb->sb_lastrecord, m); 445 printf("packet chain:\n"); 446 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 447 printf("\t%p\n", m); 448 panic("sblastrecordchk from %s", where); 449 } 450 } 451 452 void 453 sblastmbufchk(struct sockbuf *sb, const char *where) 454 { 455 struct mbuf *m = sb->sb_mb; 456 struct mbuf *n; 457 458 while (m && m->m_nextpkt) 459 m = m->m_nextpkt; 460 461 while (m && m->m_next) 462 m = m->m_next; 463 464 if (m != sb->sb_mbtail) { 465 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 466 sb->sb_mb, sb->sb_mbtail, m); 467 printf("packet tree:\n"); 468 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 469 printf("\t"); 470 for (n = m; n != NULL; n = n->m_next) 471 printf("%p ", n); 472 printf("\n"); 473 } 474 panic("sblastmbufchk from %s", where); 475 } 476 } 477 #endif /* SOCKBUF_DEBUG */ 478 479 #define SBLINKRECORD(sb, m0) \ 480 do { \ 481 if ((sb)->sb_lastrecord != NULL) \ 482 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 483 else \ 484 (sb)->sb_mb = (m0); \ 485 (sb)->sb_lastrecord = (m0); \ 486 } while (/*CONSTCOND*/0) 487 488 /* 489 * Append mbuf chain m to the last record in the 490 * socket buffer sb. The additional space associated 491 * the mbuf chain is recorded in sb. Empty mbufs are 492 * discarded and mbufs are compacted where possible. 493 */ 494 void 495 sbappend(struct sockbuf *sb, struct mbuf *m) 496 { 497 struct mbuf *n; 498 499 if (m == 0) 500 return; 501 502 #ifdef MBUFTRACE 503 m_claim(m, sb->sb_mowner); 504 #endif 505 506 SBLASTRECORDCHK(sb, "sbappend 1"); 507 508 if ((n = sb->sb_lastrecord) != NULL) { 509 /* 510 * XXX Would like to simply use sb_mbtail here, but 511 * XXX I need to verify that I won't miss an EOR that 512 * XXX way. 513 */ 514 do { 515 if (n->m_flags & M_EOR) { 516 sbappendrecord(sb, m); /* XXXXXX!!!! */ 517 return; 518 } 519 } while (n->m_next && (n = n->m_next)); 520 } else { 521 /* 522 * If this is the first record in the socket buffer, it's 523 * also the last record. 524 */ 525 sb->sb_lastrecord = m; 526 } 527 sbcompress(sb, m, n); 528 SBLASTRECORDCHK(sb, "sbappend 2"); 529 } 530 531 /* 532 * This version of sbappend() should only be used when the caller 533 * absolutely knows that there will never be more than one record 534 * in the socket buffer, that is, a stream protocol (such as TCP). 535 */ 536 void 537 sbappendstream(struct sockbuf *sb, struct mbuf *m) 538 { 539 540 KDASSERT(m->m_nextpkt == NULL); 541 KASSERT(sb->sb_mb == sb->sb_lastrecord); 542 543 SBLASTMBUFCHK(sb, __func__); 544 545 #ifdef MBUFTRACE 546 m_claim(m, sb->sb_mowner); 547 #endif 548 549 sbcompress(sb, m, sb->sb_mbtail); 550 551 sb->sb_lastrecord = sb->sb_mb; 552 SBLASTRECORDCHK(sb, __func__); 553 } 554 555 #ifdef SOCKBUF_DEBUG 556 void 557 sbcheck(struct sockbuf *sb) 558 { 559 struct mbuf *m; 560 u_long len, mbcnt; 561 562 len = 0; 563 mbcnt = 0; 564 for (m = sb->sb_mb; m; m = m->m_next) { 565 len += m->m_len; 566 mbcnt += MSIZE; 567 if (m->m_flags & M_EXT) 568 mbcnt += m->m_ext.ext_size; 569 if (m->m_nextpkt) 570 panic("sbcheck nextpkt"); 571 } 572 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 573 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 574 mbcnt, sb->sb_mbcnt); 575 panic("sbcheck"); 576 } 577 } 578 #endif 579 580 /* 581 * As above, except the mbuf chain 582 * begins a new record. 583 */ 584 void 585 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 586 { 587 struct mbuf *m; 588 589 if (m0 == 0) 590 return; 591 592 #ifdef MBUFTRACE 593 m_claim(m0, sb->sb_mowner); 594 #endif 595 /* 596 * Put the first mbuf on the queue. 597 * Note this permits zero length records. 598 */ 599 sballoc(sb, m0); 600 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 601 SBLINKRECORD(sb, m0); 602 m = m0->m_next; 603 m0->m_next = 0; 604 if (m && (m0->m_flags & M_EOR)) { 605 m0->m_flags &= ~M_EOR; 606 m->m_flags |= M_EOR; 607 } 608 sbcompress(sb, m, m0); 609 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 610 } 611 612 /* 613 * As above except that OOB data 614 * is inserted at the beginning of the sockbuf, 615 * but after any other OOB data. 616 */ 617 void 618 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 619 { 620 struct mbuf *m, **mp; 621 622 if (m0 == 0) 623 return; 624 625 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 626 627 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 628 again: 629 switch (m->m_type) { 630 631 case MT_OOBDATA: 632 continue; /* WANT next train */ 633 634 case MT_CONTROL: 635 if ((m = m->m_next) != NULL) 636 goto again; /* inspect THIS train further */ 637 } 638 break; 639 } 640 /* 641 * Put the first mbuf on the queue. 642 * Note this permits zero length records. 643 */ 644 sballoc(sb, m0); 645 m0->m_nextpkt = *mp; 646 if (*mp == NULL) { 647 /* m0 is actually the new tail */ 648 sb->sb_lastrecord = m0; 649 } 650 *mp = m0; 651 m = m0->m_next; 652 m0->m_next = 0; 653 if (m && (m0->m_flags & M_EOR)) { 654 m0->m_flags &= ~M_EOR; 655 m->m_flags |= M_EOR; 656 } 657 sbcompress(sb, m, m0); 658 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 659 } 660 661 /* 662 * Append address and data, and optionally, control (ancillary) data 663 * to the receive queue of a socket. If present, 664 * m0 must include a packet header with total length. 665 * Returns 0 if no space in sockbuf or insufficient mbufs. 666 */ 667 int 668 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 669 struct mbuf *control) 670 { 671 struct mbuf *m, *n, *nlast; 672 int space, len; 673 674 space = asa->sa_len; 675 676 if (m0 != NULL) { 677 if ((m0->m_flags & M_PKTHDR) == 0) 678 panic("sbappendaddr"); 679 space += m0->m_pkthdr.len; 680 #ifdef MBUFTRACE 681 m_claim(m0, sb->sb_mowner); 682 #endif 683 } 684 for (n = control; n; n = n->m_next) { 685 space += n->m_len; 686 MCLAIM(n, sb->sb_mowner); 687 if (n->m_next == 0) /* keep pointer to last control buf */ 688 break; 689 } 690 if (space > sbspace(sb)) 691 return (0); 692 MGET(m, M_DONTWAIT, MT_SONAME); 693 if (m == 0) 694 return (0); 695 MCLAIM(m, sb->sb_mowner); 696 /* 697 * XXX avoid 'comparison always true' warning which isn't easily 698 * avoided. 699 */ 700 len = asa->sa_len; 701 if (len > MLEN) { 702 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 703 if ((m->m_flags & M_EXT) == 0) { 704 m_free(m); 705 return (0); 706 } 707 } 708 m->m_len = asa->sa_len; 709 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 710 if (n) 711 n->m_next = m0; /* concatenate data to control */ 712 else 713 control = m0; 714 m->m_next = control; 715 716 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 717 718 for (n = m; n->m_next != NULL; n = n->m_next) 719 sballoc(sb, n); 720 sballoc(sb, n); 721 nlast = n; 722 SBLINKRECORD(sb, m); 723 724 sb->sb_mbtail = nlast; 725 SBLASTMBUFCHK(sb, "sbappendaddr"); 726 727 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 728 729 return (1); 730 } 731 732 int 733 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 734 { 735 struct mbuf *m, *mlast, *n; 736 int space; 737 738 space = 0; 739 if (control == 0) 740 panic("sbappendcontrol"); 741 for (m = control; ; m = m->m_next) { 742 space += m->m_len; 743 MCLAIM(m, sb->sb_mowner); 744 if (m->m_next == 0) 745 break; 746 } 747 n = m; /* save pointer to last control buffer */ 748 for (m = m0; m; m = m->m_next) { 749 MCLAIM(m, sb->sb_mowner); 750 space += m->m_len; 751 } 752 if (space > sbspace(sb)) 753 return (0); 754 n->m_next = m0; /* concatenate data to control */ 755 756 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 757 758 for (m = control; m->m_next != NULL; m = m->m_next) 759 sballoc(sb, m); 760 sballoc(sb, m); 761 mlast = m; 762 SBLINKRECORD(sb, control); 763 764 sb->sb_mbtail = mlast; 765 SBLASTMBUFCHK(sb, "sbappendcontrol"); 766 767 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 768 769 return (1); 770 } 771 772 /* 773 * Compress mbuf chain m into the socket 774 * buffer sb following mbuf n. If n 775 * is null, the buffer is presumed empty. 776 */ 777 void 778 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 779 { 780 int eor; 781 struct mbuf *o; 782 783 eor = 0; 784 while (m) { 785 eor |= m->m_flags & M_EOR; 786 if (m->m_len == 0 && 787 (eor == 0 || 788 (((o = m->m_next) || (o = n)) && 789 o->m_type == m->m_type))) { 790 if (sb->sb_lastrecord == m) 791 sb->sb_lastrecord = m->m_next; 792 m = m_free(m); 793 continue; 794 } 795 if (n && (n->m_flags & M_EOR) == 0 && 796 /* M_TRAILINGSPACE() checks buffer writeability */ 797 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 798 m->m_len <= M_TRAILINGSPACE(n) && 799 n->m_type == m->m_type) { 800 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 801 (unsigned)m->m_len); 802 n->m_len += m->m_len; 803 sb->sb_cc += m->m_len; 804 m = m_free(m); 805 continue; 806 } 807 if (n) 808 n->m_next = m; 809 else 810 sb->sb_mb = m; 811 sb->sb_mbtail = m; 812 sballoc(sb, m); 813 n = m; 814 m->m_flags &= ~M_EOR; 815 m = m->m_next; 816 n->m_next = 0; 817 } 818 if (eor) { 819 if (n) 820 n->m_flags |= eor; 821 else 822 printf("semi-panic: sbcompress\n"); 823 } 824 SBLASTMBUFCHK(sb, __func__); 825 } 826 827 /* 828 * Free all mbufs in a sockbuf. 829 * Check that all resources are reclaimed. 830 */ 831 void 832 sbflush(struct sockbuf *sb) 833 { 834 835 KASSERT((sb->sb_flags & SB_LOCK) == 0); 836 837 while (sb->sb_mbcnt) 838 sbdrop(sb, (int)sb->sb_cc); 839 840 KASSERT(sb->sb_cc == 0); 841 KASSERT(sb->sb_mb == NULL); 842 KASSERT(sb->sb_mbtail == NULL); 843 KASSERT(sb->sb_lastrecord == NULL); 844 } 845 846 /* 847 * Drop data from (the front of) a sockbuf. 848 */ 849 void 850 sbdrop(struct sockbuf *sb, int len) 851 { 852 struct mbuf *m, *mn, *next; 853 854 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 855 while (len > 0) { 856 if (m == 0) { 857 if (next == 0) 858 panic("sbdrop"); 859 m = next; 860 next = m->m_nextpkt; 861 continue; 862 } 863 if (m->m_len > len) { 864 m->m_len -= len; 865 m->m_data += len; 866 sb->sb_cc -= len; 867 break; 868 } 869 len -= m->m_len; 870 sbfree(sb, m); 871 MFREE(m, mn); 872 m = mn; 873 } 874 while (m && m->m_len == 0) { 875 sbfree(sb, m); 876 MFREE(m, mn); 877 m = mn; 878 } 879 if (m) { 880 sb->sb_mb = m; 881 m->m_nextpkt = next; 882 } else 883 sb->sb_mb = next; 884 /* 885 * First part is an inline SB_EMPTY_FIXUP(). Second part 886 * makes sure sb_lastrecord is up-to-date if we dropped 887 * part of the last record. 888 */ 889 m = sb->sb_mb; 890 if (m == NULL) { 891 sb->sb_mbtail = NULL; 892 sb->sb_lastrecord = NULL; 893 } else if (m->m_nextpkt == NULL) 894 sb->sb_lastrecord = m; 895 } 896 897 /* 898 * Drop a record off the front of a sockbuf 899 * and move the next record to the front. 900 */ 901 void 902 sbdroprecord(struct sockbuf *sb) 903 { 904 struct mbuf *m, *mn; 905 906 m = sb->sb_mb; 907 if (m) { 908 sb->sb_mb = m->m_nextpkt; 909 do { 910 sbfree(sb, m); 911 MFREE(m, mn); 912 } while ((m = mn) != NULL); 913 } 914 SB_EMPTY_FIXUP(sb); 915 } 916 917 /* 918 * Create a "control" mbuf containing the specified data 919 * with the specified type for presentation on a socket buffer. 920 */ 921 struct mbuf * 922 sbcreatecontrol(caddr_t p, int size, int type, int level) 923 { 924 struct cmsghdr *cp; 925 struct mbuf *m; 926 927 if (CMSG_SPACE(size) > MCLBYTES) { 928 printf("sbcreatecontrol: message too large %d\n", size); 929 return NULL; 930 } 931 932 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 933 return ((struct mbuf *) NULL); 934 if (CMSG_SPACE(size) > MLEN) { 935 MCLGET(m, M_DONTWAIT); 936 if ((m->m_flags & M_EXT) == 0) { 937 m_free(m); 938 return NULL; 939 } 940 } 941 cp = mtod(m, struct cmsghdr *); 942 memcpy(CMSG_DATA(cp), p, size); 943 m->m_len = CMSG_SPACE(size); 944 cp->cmsg_len = CMSG_LEN(size); 945 cp->cmsg_level = level; 946 cp->cmsg_type = type; 947 return (m); 948 } 949