1 /* $NetBSD: uipc_socket2.c,v 1.54 2003/08/07 16:31:59 agc Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.54 2003/08/07 16:31:59 agc Exp $"); 36 37 #include "opt_mbuftrace.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/file.h> 43 #include <sys/buf.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 51 /* 52 * Primitive routines for operating on sockets and socket buffers 53 */ 54 55 /* strings for sleep message: */ 56 const char netcon[] = "netcon"; 57 const char netcls[] = "netcls"; 58 const char netio[] = "netio"; 59 const char netlck[] = "netlck"; 60 61 /* 62 * Procedures to manipulate state flags of socket 63 * and do appropriate wakeups. Normal sequence from the 64 * active (originating) side is that soisconnecting() is 65 * called during processing of connect() call, 66 * resulting in an eventual call to soisconnected() if/when the 67 * connection is established. When the connection is torn down 68 * soisdisconnecting() is called during processing of disconnect() call, 69 * and soisdisconnected() is called when the connection to the peer 70 * is totally severed. The semantics of these routines are such that 71 * connectionless protocols can call soisconnected() and soisdisconnected() 72 * only, bypassing the in-progress calls when setting up a ``connection'' 73 * takes no time. 74 * 75 * From the passive side, a socket is created with 76 * two queues of sockets: so_q0 for connections in progress 77 * and so_q for connections already made and awaiting user acceptance. 78 * As a protocol is preparing incoming connections, it creates a socket 79 * structure queued on so_q0 by calling sonewconn(). When the connection 80 * is established, soisconnected() is called, and transfers the 81 * socket structure to so_q, making it available to accept(). 82 * 83 * If a socket is closed with sockets on either 84 * so_q0 or so_q, these sockets are dropped. 85 * 86 * If higher level protocols are implemented in 87 * the kernel, the wakeups done here will sometimes 88 * cause software-interrupt process scheduling. 89 */ 90 91 void 92 soisconnecting(struct socket *so) 93 { 94 95 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 96 so->so_state |= SS_ISCONNECTING; 97 } 98 99 void 100 soisconnected(struct socket *so) 101 { 102 struct socket *head; 103 104 head = so->so_head; 105 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 106 so->so_state |= SS_ISCONNECTED; 107 if (head && soqremque(so, 0)) { 108 soqinsque(head, so, 1); 109 sorwakeup(head); 110 wakeup((caddr_t)&head->so_timeo); 111 } else { 112 wakeup((caddr_t)&so->so_timeo); 113 sorwakeup(so); 114 sowwakeup(so); 115 } 116 } 117 118 void 119 soisdisconnecting(struct socket *so) 120 { 121 122 so->so_state &= ~SS_ISCONNECTING; 123 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 124 wakeup((caddr_t)&so->so_timeo); 125 sowwakeup(so); 126 sorwakeup(so); 127 } 128 129 void 130 soisdisconnected(struct socket *so) 131 { 132 133 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 134 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 135 wakeup((caddr_t)&so->so_timeo); 136 sowwakeup(so); 137 sorwakeup(so); 138 } 139 140 /* 141 * When an attempt at a new connection is noted on a socket 142 * which accepts connections, sonewconn is called. If the 143 * connection is possible (subject to space constraints, etc.) 144 * then we allocate a new structure, propoerly linked into the 145 * data structure of the original socket, and return this. 146 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 147 * 148 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 149 * to catch calls that are missing the (new) second parameter. 150 */ 151 struct socket * 152 sonewconn1(struct socket *head, int connstatus) 153 { 154 struct socket *so; 155 int soqueue; 156 157 soqueue = connstatus ? 1 : 0; 158 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 159 return ((struct socket *)0); 160 so = pool_get(&socket_pool, PR_NOWAIT); 161 if (so == NULL) 162 return (NULL); 163 memset((caddr_t)so, 0, sizeof(*so)); 164 so->so_type = head->so_type; 165 so->so_options = head->so_options &~ SO_ACCEPTCONN; 166 so->so_linger = head->so_linger; 167 so->so_state = head->so_state | SS_NOFDREF; 168 so->so_proto = head->so_proto; 169 so->so_timeo = head->so_timeo; 170 so->so_pgid = head->so_pgid; 171 so->so_send = head->so_send; 172 so->so_receive = head->so_receive; 173 so->so_uid = head->so_uid; 174 #ifdef MBUFTRACE 175 so->so_mowner = head->so_mowner; 176 so->so_rcv.sb_mowner = head->so_rcv.sb_mowner; 177 so->so_snd.sb_mowner = head->so_snd.sb_mowner; 178 #endif 179 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 180 soqinsque(head, so, soqueue); 181 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 182 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, 183 (struct proc *)0)) { 184 (void) soqremque(so, soqueue); 185 pool_put(&socket_pool, so); 186 return (NULL); 187 } 188 if (connstatus) { 189 sorwakeup(head); 190 wakeup((caddr_t)&head->so_timeo); 191 so->so_state |= connstatus; 192 } 193 return (so); 194 } 195 196 void 197 soqinsque(struct socket *head, struct socket *so, int q) 198 { 199 200 #ifdef DIAGNOSTIC 201 if (so->so_onq != NULL) 202 panic("soqinsque"); 203 #endif 204 205 so->so_head = head; 206 if (q == 0) { 207 head->so_q0len++; 208 so->so_onq = &head->so_q0; 209 } else { 210 head->so_qlen++; 211 so->so_onq = &head->so_q; 212 } 213 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 214 } 215 216 int 217 soqremque(struct socket *so, int q) 218 { 219 struct socket *head; 220 221 head = so->so_head; 222 if (q == 0) { 223 if (so->so_onq != &head->so_q0) 224 return (0); 225 head->so_q0len--; 226 } else { 227 if (so->so_onq != &head->so_q) 228 return (0); 229 head->so_qlen--; 230 } 231 TAILQ_REMOVE(so->so_onq, so, so_qe); 232 so->so_onq = NULL; 233 so->so_head = NULL; 234 return (1); 235 } 236 237 /* 238 * Socantsendmore indicates that no more data will be sent on the 239 * socket; it would normally be applied to a socket when the user 240 * informs the system that no more data is to be sent, by the protocol 241 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 242 * will be received, and will normally be applied to the socket by a 243 * protocol when it detects that the peer will send no more data. 244 * Data queued for reading in the socket may yet be read. 245 */ 246 247 void 248 socantsendmore(struct socket *so) 249 { 250 251 so->so_state |= SS_CANTSENDMORE; 252 sowwakeup(so); 253 } 254 255 void 256 socantrcvmore(struct socket *so) 257 { 258 259 so->so_state |= SS_CANTRCVMORE; 260 sorwakeup(so); 261 } 262 263 /* 264 * Wait for data to arrive at/drain from a socket buffer. 265 */ 266 int 267 sbwait(struct sockbuf *sb) 268 { 269 270 sb->sb_flags |= SB_WAIT; 271 return (tsleep((caddr_t)&sb->sb_cc, 272 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 273 sb->sb_timeo)); 274 } 275 276 /* 277 * Lock a sockbuf already known to be locked; 278 * return any error returned from sleep (EINTR). 279 */ 280 int 281 sb_lock(struct sockbuf *sb) 282 { 283 int error; 284 285 while (sb->sb_flags & SB_LOCK) { 286 sb->sb_flags |= SB_WANT; 287 error = tsleep((caddr_t)&sb->sb_flags, 288 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 289 netlck, 0); 290 if (error) 291 return (error); 292 } 293 sb->sb_flags |= SB_LOCK; 294 return (0); 295 } 296 297 /* 298 * Wakeup processes waiting on a socket buffer. 299 * Do asynchronous notification via SIGIO 300 * if the socket buffer has the SB_ASYNC flag set. 301 */ 302 void 303 sowakeup(struct socket *so, struct sockbuf *sb) 304 { 305 struct proc *p; 306 307 selnotify(&sb->sb_sel, 0); 308 sb->sb_flags &= ~SB_SEL; 309 if (sb->sb_flags & SB_WAIT) { 310 sb->sb_flags &= ~SB_WAIT; 311 wakeup((caddr_t)&sb->sb_cc); 312 } 313 if (sb->sb_flags & SB_ASYNC) { 314 if (so->so_pgid < 0) 315 gsignal(-so->so_pgid, SIGIO); 316 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 317 psignal(p, SIGIO); 318 } 319 if (sb->sb_flags & SB_UPCALL) 320 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 321 } 322 323 /* 324 * Socket buffer (struct sockbuf) utility routines. 325 * 326 * Each socket contains two socket buffers: one for sending data and 327 * one for receiving data. Each buffer contains a queue of mbufs, 328 * information about the number of mbufs and amount of data in the 329 * queue, and other fields allowing poll() statements and notification 330 * on data availability to be implemented. 331 * 332 * Data stored in a socket buffer is maintained as a list of records. 333 * Each record is a list of mbufs chained together with the m_next 334 * field. Records are chained together with the m_nextpkt field. The upper 335 * level routine soreceive() expects the following conventions to be 336 * observed when placing information in the receive buffer: 337 * 338 * 1. If the protocol requires each message be preceded by the sender's 339 * name, then a record containing that name must be present before 340 * any associated data (mbuf's must be of type MT_SONAME). 341 * 2. If the protocol supports the exchange of ``access rights'' (really 342 * just additional data associated with the message), and there are 343 * ``rights'' to be received, then a record containing this data 344 * should be present (mbuf's must be of type MT_CONTROL). 345 * 3. If a name or rights record exists, then it must be followed by 346 * a data record, perhaps of zero length. 347 * 348 * Before using a new socket structure it is first necessary to reserve 349 * buffer space to the socket, by calling sbreserve(). This should commit 350 * some of the available buffer space in the system buffer pool for the 351 * socket (currently, it does nothing but enforce limits). The space 352 * should be released by calling sbrelease() when the socket is destroyed. 353 */ 354 355 int 356 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 357 { 358 359 if (sbreserve(&so->so_snd, sndcc) == 0) 360 goto bad; 361 if (sbreserve(&so->so_rcv, rcvcc) == 0) 362 goto bad2; 363 if (so->so_rcv.sb_lowat == 0) 364 so->so_rcv.sb_lowat = 1; 365 if (so->so_snd.sb_lowat == 0) 366 so->so_snd.sb_lowat = MCLBYTES; 367 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 368 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 369 return (0); 370 bad2: 371 sbrelease(&so->so_snd); 372 bad: 373 return (ENOBUFS); 374 } 375 376 /* 377 * Allot mbufs to a sockbuf. 378 * Attempt to scale mbmax so that mbcnt doesn't become limiting 379 * if buffering efficiency is near the normal case. 380 */ 381 int 382 sbreserve(struct sockbuf *sb, u_long cc) 383 { 384 385 if (cc == 0 || 386 (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 387 return (0); 388 sb->sb_hiwat = cc; 389 sb->sb_mbmax = min(cc * 2, sb_max); 390 if (sb->sb_lowat > sb->sb_hiwat) 391 sb->sb_lowat = sb->sb_hiwat; 392 return (1); 393 } 394 395 /* 396 * Free mbufs held by a socket, and reserved mbuf space. 397 */ 398 void 399 sbrelease(struct sockbuf *sb) 400 { 401 402 sbflush(sb); 403 sb->sb_hiwat = sb->sb_mbmax = 0; 404 } 405 406 /* 407 * Routines to add and remove 408 * data from an mbuf queue. 409 * 410 * The routines sbappend() or sbappendrecord() are normally called to 411 * append new mbufs to a socket buffer, after checking that adequate 412 * space is available, comparing the function sbspace() with the amount 413 * of data to be added. sbappendrecord() differs from sbappend() in 414 * that data supplied is treated as the beginning of a new record. 415 * To place a sender's address, optional access rights, and data in a 416 * socket receive buffer, sbappendaddr() should be used. To place 417 * access rights and data in a socket receive buffer, sbappendrights() 418 * should be used. In either case, the new data begins a new record. 419 * Note that unlike sbappend() and sbappendrecord(), these routines check 420 * for the caller that there will be enough space to store the data. 421 * Each fails if there is not enough space, or if it cannot find mbufs 422 * to store additional information in. 423 * 424 * Reliable protocols may use the socket send buffer to hold data 425 * awaiting acknowledgement. Data is normally copied from a socket 426 * send buffer in a protocol with m_copy for output to a peer, 427 * and then removing the data from the socket buffer with sbdrop() 428 * or sbdroprecord() when the data is acknowledged by the peer. 429 */ 430 431 #ifdef SOCKBUF_DEBUG 432 void 433 sblastrecordchk(struct sockbuf *sb, const char *where) 434 { 435 struct mbuf *m = sb->sb_mb; 436 437 while (m && m->m_nextpkt) 438 m = m->m_nextpkt; 439 440 if (m != sb->sb_lastrecord) { 441 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 442 sb->sb_mb, sb->sb_lastrecord, m); 443 printf("packet chain:\n"); 444 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 445 printf("\t%p\n", m); 446 panic("sblastrecordchk from %s", where); 447 } 448 } 449 450 void 451 sblastmbufchk(struct sockbuf *sb, const char *where) 452 { 453 struct mbuf *m = sb->sb_mb; 454 struct mbuf *n; 455 456 while (m && m->m_nextpkt) 457 m = m->m_nextpkt; 458 459 while (m && m->m_next) 460 m = m->m_next; 461 462 if (m != sb->sb_mbtail) { 463 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 464 sb->sb_mb, sb->sb_mbtail, m); 465 printf("packet tree:\n"); 466 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 467 printf("\t"); 468 for (n = m; n != NULL; n = n->m_next) 469 printf("%p ", n); 470 printf("\n"); 471 } 472 panic("sblastmbufchk from %s", where); 473 } 474 } 475 #endif /* SOCKBUF_DEBUG */ 476 477 #define SBLINKRECORD(sb, m0) \ 478 do { \ 479 if ((sb)->sb_lastrecord != NULL) \ 480 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 481 else \ 482 (sb)->sb_mb = (m0); \ 483 (sb)->sb_lastrecord = (m0); \ 484 } while (/*CONSTCOND*/0) 485 486 /* 487 * Append mbuf chain m to the last record in the 488 * socket buffer sb. The additional space associated 489 * the mbuf chain is recorded in sb. Empty mbufs are 490 * discarded and mbufs are compacted where possible. 491 */ 492 void 493 sbappend(struct sockbuf *sb, struct mbuf *m) 494 { 495 struct mbuf *n; 496 497 if (m == 0) 498 return; 499 500 #ifdef MBUFTRACE 501 m_claim(m, sb->sb_mowner); 502 #endif 503 504 SBLASTRECORDCHK(sb, "sbappend 1"); 505 506 if ((n = sb->sb_lastrecord) != NULL) { 507 /* 508 * XXX Would like to simply use sb_mbtail here, but 509 * XXX I need to verify that I won't miss an EOR that 510 * XXX way. 511 */ 512 do { 513 if (n->m_flags & M_EOR) { 514 sbappendrecord(sb, m); /* XXXXXX!!!! */ 515 return; 516 } 517 } while (n->m_next && (n = n->m_next)); 518 } else { 519 /* 520 * If this is the first record in the socket buffer, it's 521 * also the last record. 522 */ 523 sb->sb_lastrecord = m; 524 } 525 sbcompress(sb, m, n); 526 SBLASTRECORDCHK(sb, "sbappend 2"); 527 } 528 529 /* 530 * This version of sbappend() should only be used when the caller 531 * absolutely knows that there will never be more than one record 532 * in the socket buffer, that is, a stream protocol (such as TCP). 533 */ 534 void 535 sbappendstream(struct sockbuf *sb, struct mbuf *m) 536 { 537 538 KDASSERT(m->m_nextpkt == NULL); 539 KASSERT(sb->sb_mb == sb->sb_lastrecord); 540 541 SBLASTMBUFCHK(sb, __func__); 542 543 #ifdef MBUFTRACE 544 m_claim(m, sb->sb_mowner); 545 #endif 546 547 sbcompress(sb, m, sb->sb_mbtail); 548 549 sb->sb_lastrecord = sb->sb_mb; 550 SBLASTRECORDCHK(sb, __func__); 551 } 552 553 #ifdef SOCKBUF_DEBUG 554 void 555 sbcheck(struct sockbuf *sb) 556 { 557 struct mbuf *m; 558 u_long len, mbcnt; 559 560 len = 0; 561 mbcnt = 0; 562 for (m = sb->sb_mb; m; m = m->m_next) { 563 len += m->m_len; 564 mbcnt += MSIZE; 565 if (m->m_flags & M_EXT) 566 mbcnt += m->m_ext.ext_size; 567 if (m->m_nextpkt) 568 panic("sbcheck nextpkt"); 569 } 570 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 571 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 572 mbcnt, sb->sb_mbcnt); 573 panic("sbcheck"); 574 } 575 } 576 #endif 577 578 /* 579 * As above, except the mbuf chain 580 * begins a new record. 581 */ 582 void 583 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 584 { 585 struct mbuf *m; 586 587 if (m0 == 0) 588 return; 589 590 #ifdef MBUFTRACE 591 m_claim(m0, sb->sb_mowner); 592 #endif 593 /* 594 * Put the first mbuf on the queue. 595 * Note this permits zero length records. 596 */ 597 sballoc(sb, m0); 598 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 599 SBLINKRECORD(sb, m0); 600 m = m0->m_next; 601 m0->m_next = 0; 602 if (m && (m0->m_flags & M_EOR)) { 603 m0->m_flags &= ~M_EOR; 604 m->m_flags |= M_EOR; 605 } 606 sbcompress(sb, m, m0); 607 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 608 } 609 610 /* 611 * As above except that OOB data 612 * is inserted at the beginning of the sockbuf, 613 * but after any other OOB data. 614 */ 615 void 616 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 617 { 618 struct mbuf *m, **mp; 619 620 if (m0 == 0) 621 return; 622 623 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 624 625 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 626 again: 627 switch (m->m_type) { 628 629 case MT_OOBDATA: 630 continue; /* WANT next train */ 631 632 case MT_CONTROL: 633 if ((m = m->m_next) != NULL) 634 goto again; /* inspect THIS train further */ 635 } 636 break; 637 } 638 /* 639 * Put the first mbuf on the queue. 640 * Note this permits zero length records. 641 */ 642 sballoc(sb, m0); 643 m0->m_nextpkt = *mp; 644 if (*mp == NULL) { 645 /* m0 is actually the new tail */ 646 sb->sb_lastrecord = m0; 647 } 648 *mp = m0; 649 m = m0->m_next; 650 m0->m_next = 0; 651 if (m && (m0->m_flags & M_EOR)) { 652 m0->m_flags &= ~M_EOR; 653 m->m_flags |= M_EOR; 654 } 655 sbcompress(sb, m, m0); 656 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 657 } 658 659 /* 660 * Append address and data, and optionally, control (ancillary) data 661 * to the receive queue of a socket. If present, 662 * m0 must include a packet header with total length. 663 * Returns 0 if no space in sockbuf or insufficient mbufs. 664 */ 665 int 666 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 667 struct mbuf *control) 668 { 669 struct mbuf *m, *n, *nlast; 670 int space, len; 671 672 space = asa->sa_len; 673 674 if (m0 != NULL) { 675 if ((m0->m_flags & M_PKTHDR) == 0) 676 panic("sbappendaddr"); 677 space += m0->m_pkthdr.len; 678 #ifdef MBUFTRACE 679 m_claim(m0, sb->sb_mowner); 680 #endif 681 } 682 for (n = control; n; n = n->m_next) { 683 space += n->m_len; 684 MCLAIM(n, sb->sb_mowner); 685 if (n->m_next == 0) /* keep pointer to last control buf */ 686 break; 687 } 688 if (space > sbspace(sb)) 689 return (0); 690 MGET(m, M_DONTWAIT, MT_SONAME); 691 if (m == 0) 692 return (0); 693 MCLAIM(m, sb->sb_mowner); 694 /* 695 * XXX avoid 'comparison always true' warning which isn't easily 696 * avoided. 697 */ 698 len = asa->sa_len; 699 if (len > MLEN) { 700 MEXTMALLOC(m, asa->sa_len, M_NOWAIT); 701 if ((m->m_flags & M_EXT) == 0) { 702 m_free(m); 703 return (0); 704 } 705 } 706 m->m_len = asa->sa_len; 707 memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len); 708 if (n) 709 n->m_next = m0; /* concatenate data to control */ 710 else 711 control = m0; 712 m->m_next = control; 713 714 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 715 716 for (n = m; n->m_next != NULL; n = n->m_next) 717 sballoc(sb, n); 718 sballoc(sb, n); 719 nlast = n; 720 SBLINKRECORD(sb, m); 721 722 sb->sb_mbtail = nlast; 723 SBLASTMBUFCHK(sb, "sbappendaddr"); 724 725 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 726 727 return (1); 728 } 729 730 int 731 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 732 { 733 struct mbuf *m, *mlast, *n; 734 int space; 735 736 space = 0; 737 if (control == 0) 738 panic("sbappendcontrol"); 739 for (m = control; ; m = m->m_next) { 740 space += m->m_len; 741 MCLAIM(m, sb->sb_mowner); 742 if (m->m_next == 0) 743 break; 744 } 745 n = m; /* save pointer to last control buffer */ 746 for (m = m0; m; m = m->m_next) { 747 MCLAIM(m, sb->sb_mowner); 748 space += m->m_len; 749 } 750 if (space > sbspace(sb)) 751 return (0); 752 n->m_next = m0; /* concatenate data to control */ 753 754 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 755 756 for (m = control; m->m_next != NULL; m = m->m_next) 757 sballoc(sb, m); 758 sballoc(sb, m); 759 mlast = m; 760 SBLINKRECORD(sb, control); 761 762 sb->sb_mbtail = mlast; 763 SBLASTMBUFCHK(sb, "sbappendcontrol"); 764 765 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 766 767 return (1); 768 } 769 770 /* 771 * Compress mbuf chain m into the socket 772 * buffer sb following mbuf n. If n 773 * is null, the buffer is presumed empty. 774 */ 775 void 776 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 777 { 778 int eor; 779 struct mbuf *o; 780 781 eor = 0; 782 while (m) { 783 eor |= m->m_flags & M_EOR; 784 if (m->m_len == 0 && 785 (eor == 0 || 786 (((o = m->m_next) || (o = n)) && 787 o->m_type == m->m_type))) { 788 if (sb->sb_lastrecord == m) 789 sb->sb_lastrecord = m->m_next; 790 m = m_free(m); 791 continue; 792 } 793 if (n && (n->m_flags & M_EOR) == 0 && 794 /* M_TRAILINGSPACE() checks buffer writeability */ 795 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 796 m->m_len <= M_TRAILINGSPACE(n) && 797 n->m_type == m->m_type) { 798 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 799 (unsigned)m->m_len); 800 n->m_len += m->m_len; 801 sb->sb_cc += m->m_len; 802 m = m_free(m); 803 continue; 804 } 805 if (n) 806 n->m_next = m; 807 else 808 sb->sb_mb = m; 809 sb->sb_mbtail = m; 810 sballoc(sb, m); 811 n = m; 812 m->m_flags &= ~M_EOR; 813 m = m->m_next; 814 n->m_next = 0; 815 } 816 if (eor) { 817 if (n) 818 n->m_flags |= eor; 819 else 820 printf("semi-panic: sbcompress\n"); 821 } 822 SBLASTMBUFCHK(sb, __func__); 823 } 824 825 /* 826 * Free all mbufs in a sockbuf. 827 * Check that all resources are reclaimed. 828 */ 829 void 830 sbflush(struct sockbuf *sb) 831 { 832 833 KASSERT((sb->sb_flags & SB_LOCK) == 0); 834 835 while (sb->sb_mbcnt) 836 sbdrop(sb, (int)sb->sb_cc); 837 838 KASSERT(sb->sb_cc == 0); 839 KASSERT(sb->sb_mb == NULL); 840 KASSERT(sb->sb_mbtail == NULL); 841 KASSERT(sb->sb_lastrecord == NULL); 842 } 843 844 /* 845 * Drop data from (the front of) a sockbuf. 846 */ 847 void 848 sbdrop(struct sockbuf *sb, int len) 849 { 850 struct mbuf *m, *mn, *next; 851 852 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 853 while (len > 0) { 854 if (m == 0) { 855 if (next == 0) 856 panic("sbdrop"); 857 m = next; 858 next = m->m_nextpkt; 859 continue; 860 } 861 if (m->m_len > len) { 862 m->m_len -= len; 863 m->m_data += len; 864 sb->sb_cc -= len; 865 break; 866 } 867 len -= m->m_len; 868 sbfree(sb, m); 869 MFREE(m, mn); 870 m = mn; 871 } 872 while (m && m->m_len == 0) { 873 sbfree(sb, m); 874 MFREE(m, mn); 875 m = mn; 876 } 877 if (m) { 878 sb->sb_mb = m; 879 m->m_nextpkt = next; 880 } else 881 sb->sb_mb = next; 882 /* 883 * First part is an inline SB_EMPTY_FIXUP(). Second part 884 * makes sure sb_lastrecord is up-to-date if we dropped 885 * part of the last record. 886 */ 887 m = sb->sb_mb; 888 if (m == NULL) { 889 sb->sb_mbtail = NULL; 890 sb->sb_lastrecord = NULL; 891 } else if (m->m_nextpkt == NULL) 892 sb->sb_lastrecord = m; 893 } 894 895 /* 896 * Drop a record off the front of a sockbuf 897 * and move the next record to the front. 898 */ 899 void 900 sbdroprecord(struct sockbuf *sb) 901 { 902 struct mbuf *m, *mn; 903 904 m = sb->sb_mb; 905 if (m) { 906 sb->sb_mb = m->m_nextpkt; 907 do { 908 sbfree(sb, m); 909 MFREE(m, mn); 910 } while ((m = mn) != NULL); 911 } 912 SB_EMPTY_FIXUP(sb); 913 } 914 915 /* 916 * Create a "control" mbuf containing the specified data 917 * with the specified type for presentation on a socket buffer. 918 */ 919 struct mbuf * 920 sbcreatecontrol(caddr_t p, int size, int type, int level) 921 { 922 struct cmsghdr *cp; 923 struct mbuf *m; 924 925 if (CMSG_SPACE(size) > MCLBYTES) { 926 printf("sbcreatecontrol: message too large %d\n", size); 927 return NULL; 928 } 929 930 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 931 return ((struct mbuf *) NULL); 932 if (CMSG_SPACE(size) > MLEN) { 933 MCLGET(m, M_DONTWAIT); 934 if ((m->m_flags & M_EXT) == 0) { 935 m_free(m); 936 return NULL; 937 } 938 } 939 cp = mtod(m, struct cmsghdr *); 940 memcpy(CMSG_DATA(cp), p, size); 941 m->m_len = CMSG_SPACE(size); 942 cp->cmsg_len = CMSG_LEN(size); 943 cp->cmsg_level = level; 944 cp->cmsg_type = type; 945 return (m); 946 } 947