1 /* $OpenBSD: uipc_socket2.c,v 1.48 2009/03/30 14:29:30 blambert Exp $ */ 2 /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/buf.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/protosw.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/signalvar.h> 46 #include <sys/event.h> 47 #include <sys/pool.h> 48 49 /* 50 * Primitive routines for operating on sockets and socket buffers 51 */ 52 53 /* strings for sleep message: */ 54 const char netcon[] = "netcon"; 55 const char netcls[] = "netcls"; 56 const char netio[] = "netio"; 57 const char netlck[] = "netlck"; 58 59 u_long sb_max = SB_MAX; /* patchable */ 60 61 extern struct pool mclpools[]; 62 63 /* 64 * Procedures to manipulate state flags of socket 65 * and do appropriate wakeups. Normal sequence from the 66 * active (originating) side is that soisconnecting() is 67 * called during processing of connect() call, 68 * resulting in an eventual call to soisconnected() if/when the 69 * connection is established. When the connection is torn down 70 * soisdisconnecting() is called during processing of disconnect() call, 71 * and soisdisconnected() is called when the connection to the peer 72 * is totally severed. The semantics of these routines are such that 73 * connectionless protocols can call soisconnected() and soisdisconnected() 74 * only, bypassing the in-progress calls when setting up a ``connection'' 75 * takes no time. 76 * 77 * From the passive side, a socket is created with 78 * two queues of sockets: so_q0 for connections in progress 79 * and so_q for connections already made and awaiting user acceptance. 80 * As a protocol is preparing incoming connections, it creates a socket 81 * structure queued on so_q0 by calling sonewconn(). When the connection 82 * is established, soisconnected() is called, and transfers the 83 * socket structure to so_q, making it available to accept(). 84 * 85 * If a socket is closed with sockets on either 86 * so_q0 or so_q, these sockets are dropped. 87 * 88 * If higher level protocols are implemented in 89 * the kernel, the wakeups done here will sometimes 90 * cause software-interrupt process scheduling. 91 */ 92 93 void 94 soisconnecting(struct socket *so) 95 { 96 97 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 98 so->so_state |= SS_ISCONNECTING; 99 } 100 101 void 102 soisconnected(struct socket *so) 103 { 104 struct socket *head = so->so_head; 105 106 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 107 so->so_state |= SS_ISCONNECTED; 108 if (head && soqremque(so, 0)) { 109 soqinsque(head, so, 1); 110 sorwakeup(head); 111 wakeup_one(&head->so_timeo); 112 } else { 113 wakeup(&so->so_timeo); 114 sorwakeup(so); 115 sowwakeup(so); 116 } 117 } 118 119 void 120 soisdisconnecting(struct socket *so) 121 { 122 123 so->so_state &= ~SS_ISCONNECTING; 124 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 125 wakeup(&so->so_timeo); 126 sowwakeup(so); 127 sorwakeup(so); 128 } 129 130 void 131 soisdisconnected(struct socket *so) 132 { 133 134 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 135 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 136 wakeup(&so->so_timeo); 137 sowwakeup(so); 138 sorwakeup(so); 139 } 140 141 /* 142 * When an attempt at a new connection is noted on a socket 143 * which accepts connections, sonewconn is called. If the 144 * connection is possible (subject to space constraints, etc.) 145 * then we allocate a new structure, properly linked into the 146 * data structure of the original socket, and return this. 147 * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED. 148 * 149 * Must be called at splsoftnet() 150 */ 151 struct socket * 152 sonewconn(struct socket *head, int connstatus) 153 { 154 struct socket *so; 155 int soqueue = connstatus ? 1 : 0; 156 extern u_long unpst_sendspace, unpst_recvspace; 157 u_long snd_sb_hiwat, rcv_sb_hiwat; 158 159 splsoftassert(IPL_SOFTNET); 160 161 if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100) 162 return ((struct socket *)0); 163 if (head->so_qlen + head->so_q0len > head->so_qlimit * 3) 164 return ((struct socket *)0); 165 so = pool_get(&socket_pool, PR_NOWAIT|PR_ZERO); 166 if (so == NULL) 167 return ((struct socket *)0); 168 so->so_type = head->so_type; 169 so->so_options = head->so_options &~ SO_ACCEPTCONN; 170 so->so_linger = head->so_linger; 171 so->so_state = head->so_state | SS_NOFDREF; 172 so->so_proto = head->so_proto; 173 so->so_timeo = head->so_timeo; 174 so->so_pgid = head->so_pgid; 175 so->so_euid = head->so_euid; 176 so->so_ruid = head->so_ruid; 177 so->so_egid = head->so_egid; 178 so->so_rgid = head->so_rgid; 179 so->so_cpid = head->so_cpid; 180 so->so_siguid = head->so_siguid; 181 so->so_sigeuid = head->so_sigeuid; 182 183 /* 184 * If we are tight on mbuf clusters, create the new socket 185 * with the minimum. Sorry, you lose. 186 */ 187 snd_sb_hiwat = head->so_snd.sb_hiwat; 188 if (sbcheckreserve(snd_sb_hiwat, unpst_sendspace)) 189 snd_sb_hiwat = unpst_sendspace; /* and udp? */ 190 rcv_sb_hiwat = head->so_rcv.sb_hiwat; 191 if (sbcheckreserve(rcv_sb_hiwat, unpst_recvspace)) 192 rcv_sb_hiwat = unpst_recvspace; /* and udp? */ 193 194 (void) soreserve(so, snd_sb_hiwat, rcv_sb_hiwat); 195 soqinsque(head, so, soqueue); 196 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL, 197 curproc)) { 198 (void) soqremque(so, soqueue); 199 pool_put(&socket_pool, so); 200 return ((struct socket *)0); 201 } 202 if (connstatus) { 203 sorwakeup(head); 204 wakeup(&head->so_timeo); 205 so->so_state |= connstatus; 206 } 207 return (so); 208 } 209 210 void 211 soqinsque(struct socket *head, struct socket *so, int q) 212 { 213 214 #ifdef DIAGNOSTIC 215 if (so->so_onq != NULL) 216 panic("soqinsque"); 217 #endif 218 219 so->so_head = head; 220 if (q == 0) { 221 head->so_q0len++; 222 so->so_onq = &head->so_q0; 223 } else { 224 head->so_qlen++; 225 so->so_onq = &head->so_q; 226 } 227 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 228 } 229 230 int 231 soqremque(struct socket *so, int q) 232 { 233 struct socket *head; 234 235 head = so->so_head; 236 if (q == 0) { 237 if (so->so_onq != &head->so_q0) 238 return (0); 239 head->so_q0len--; 240 } else { 241 if (so->so_onq != &head->so_q) 242 return (0); 243 head->so_qlen--; 244 } 245 TAILQ_REMOVE(so->so_onq, so, so_qe); 246 so->so_onq = NULL; 247 so->so_head = NULL; 248 return (1); 249 } 250 251 /* 252 * Socantsendmore indicates that no more data will be sent on the 253 * socket; it would normally be applied to a socket when the user 254 * informs the system that no more data is to be sent, by the protocol 255 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 256 * will be received, and will normally be applied to the socket by a 257 * protocol when it detects that the peer will send no more data. 258 * Data queued for reading in the socket may yet be read. 259 */ 260 261 void 262 socantsendmore(struct socket *so) 263 { 264 265 so->so_state |= SS_CANTSENDMORE; 266 sowwakeup(so); 267 } 268 269 void 270 socantrcvmore(struct socket *so) 271 { 272 273 so->so_state |= SS_CANTRCVMORE; 274 sorwakeup(so); 275 } 276 277 /* 278 * Wait for data to arrive at/drain from a socket buffer. 279 */ 280 int 281 sbwait(struct sockbuf *sb) 282 { 283 284 sb->sb_flags |= SB_WAIT; 285 return (tsleep(&sb->sb_cc, 286 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 287 sb->sb_timeo)); 288 } 289 290 /* 291 * Lock a sockbuf already known to be locked; 292 * return any error returned from sleep (EINTR). 293 */ 294 int 295 sb_lock(struct sockbuf *sb) 296 { 297 int error; 298 299 while (sb->sb_flags & SB_LOCK) { 300 sb->sb_flags |= SB_WANT; 301 error = tsleep(&sb->sb_flags, 302 (sb->sb_flags & SB_NOINTR) ? 303 PSOCK : PSOCK|PCATCH, netlck, 0); 304 if (error) 305 return (error); 306 } 307 sb->sb_flags |= SB_LOCK; 308 return (0); 309 } 310 311 /* 312 * Wakeup processes waiting on a socket buffer. 313 * Do asynchronous notification via SIGIO 314 * if the socket has the SS_ASYNC flag set. 315 */ 316 void 317 sowakeup(struct socket *so, struct sockbuf *sb) 318 { 319 selwakeup(&sb->sb_sel); 320 sb->sb_flags &= ~SB_SEL; 321 if (sb->sb_flags & SB_WAIT) { 322 sb->sb_flags &= ~SB_WAIT; 323 wakeup(&sb->sb_cc); 324 } 325 if (so->so_state & SS_ASYNC) 326 csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid); 327 KNOTE(&sb->sb_sel.si_note, 0); 328 } 329 330 /* 331 * Socket buffer (struct sockbuf) utility routines. 332 * 333 * Each socket contains two socket buffers: one for sending data and 334 * one for receiving data. Each buffer contains a queue of mbufs, 335 * information about the number of mbufs and amount of data in the 336 * queue, and other fields allowing select() statements and notification 337 * on data availability to be implemented. 338 * 339 * Data stored in a socket buffer is maintained as a list of records. 340 * Each record is a list of mbufs chained together with the m_next 341 * field. Records are chained together with the m_nextpkt field. The upper 342 * level routine soreceive() expects the following conventions to be 343 * observed when placing information in the receive buffer: 344 * 345 * 1. If the protocol requires each message be preceded by the sender's 346 * name, then a record containing that name must be present before 347 * any associated data (mbuf's must be of type MT_SONAME). 348 * 2. If the protocol supports the exchange of ``access rights'' (really 349 * just additional data associated with the message), and there are 350 * ``rights'' to be received, then a record containing this data 351 * should be present (mbuf's must be of type MT_CONTROL). 352 * 3. If a name or rights record exists, then it must be followed by 353 * a data record, perhaps of zero length. 354 * 355 * Before using a new socket structure it is first necessary to reserve 356 * buffer space to the socket, by calling sbreserve(). This should commit 357 * some of the available buffer space in the system buffer pool for the 358 * socket (currently, it does nothing but enforce limits). The space 359 * should be released by calling sbrelease() when the socket is destroyed. 360 */ 361 362 int 363 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 364 { 365 366 if (sbreserve(&so->so_snd, sndcc)) 367 goto bad; 368 if (sbreserve(&so->so_rcv, rcvcc)) 369 goto bad2; 370 if (so->so_rcv.sb_lowat == 0) 371 so->so_rcv.sb_lowat = 1; 372 if (so->so_snd.sb_lowat == 0) 373 so->so_snd.sb_lowat = MCLBYTES; 374 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 375 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 376 return (0); 377 bad2: 378 sbrelease(&so->so_snd); 379 bad: 380 return (ENOBUFS); 381 } 382 383 /* 384 * Allot mbufs to a sockbuf. 385 * Attempt to scale mbmax so that mbcnt doesn't become limiting 386 * if buffering efficiency is near the normal case. 387 */ 388 int 389 sbreserve(struct sockbuf *sb, u_long cc) 390 { 391 392 if (cc == 0 || cc > sb_max) 393 return (1); 394 sb->sb_hiwat = cc; 395 sb->sb_mbmax = min(cc * 2, sb_max + (sb_max / MCLBYTES) * MSIZE); 396 if (sb->sb_lowat > sb->sb_hiwat) 397 sb->sb_lowat = sb->sb_hiwat; 398 return (0); 399 } 400 401 /* 402 * If over 50% of mbuf clusters in use, do not accept any 403 * greater than normal request. 404 */ 405 int 406 sbcheckreserve(u_long cnt, u_long defcnt) 407 { 408 if (cnt > defcnt && 409 mclpools[0].pr_nout> mclpools[0].pr_hardlimit / 2) 410 return (ENOBUFS); 411 return (0); 412 } 413 414 /* 415 * Free mbufs held by a socket, and reserved mbuf space. 416 */ 417 void 418 sbrelease(struct sockbuf *sb) 419 { 420 421 sbflush(sb); 422 sb->sb_hiwat = sb->sb_mbmax = 0; 423 } 424 425 /* 426 * Routines to add and remove 427 * data from an mbuf queue. 428 * 429 * The routines sbappend() or sbappendrecord() are normally called to 430 * append new mbufs to a socket buffer, after checking that adequate 431 * space is available, comparing the function sbspace() with the amount 432 * of data to be added. sbappendrecord() differs from sbappend() in 433 * that data supplied is treated as the beginning of a new record. 434 * To place a sender's address, optional access rights, and data in a 435 * socket receive buffer, sbappendaddr() should be used. To place 436 * access rights and data in a socket receive buffer, sbappendrights() 437 * should be used. In either case, the new data begins a new record. 438 * Note that unlike sbappend() and sbappendrecord(), these routines check 439 * for the caller that there will be enough space to store the data. 440 * Each fails if there is not enough space, or if it cannot find mbufs 441 * to store additional information in. 442 * 443 * Reliable protocols may use the socket send buffer to hold data 444 * awaiting acknowledgement. Data is normally copied from a socket 445 * send buffer in a protocol with m_copy for output to a peer, 446 * and then removing the data from the socket buffer with sbdrop() 447 * or sbdroprecord() when the data is acknowledged by the peer. 448 */ 449 450 #ifdef SOCKBUF_DEBUG 451 void 452 sblastrecordchk(struct sockbuf *sb, const char *where) 453 { 454 struct mbuf *m = sb->sb_mb; 455 456 while (m && m->m_nextpkt) 457 m = m->m_nextpkt; 458 459 if (m != sb->sb_lastrecord) { 460 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 461 sb->sb_mb, sb->sb_lastrecord, m); 462 printf("packet chain:\n"); 463 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 464 printf("\t%p\n", m); 465 panic("sblastrecordchk from %s", where); 466 } 467 } 468 469 void 470 sblastmbufchk(struct sockbuf *sb, const char *where) 471 { 472 struct mbuf *m = sb->sb_mb; 473 struct mbuf *n; 474 475 while (m && m->m_nextpkt) 476 m = m->m_nextpkt; 477 478 while (m && m->m_next) 479 m = m->m_next; 480 481 if (m != sb->sb_mbtail) { 482 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 483 sb->sb_mb, sb->sb_mbtail, m); 484 printf("packet tree:\n"); 485 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 486 printf("\t"); 487 for (n = m; n != NULL; n = n->m_next) 488 printf("%p ", n); 489 printf("\n"); 490 } 491 panic("sblastmbufchk from %s", where); 492 } 493 } 494 #endif /* SOCKBUF_DEBUG */ 495 496 #define SBLINKRECORD(sb, m0) \ 497 do { \ 498 if ((sb)->sb_lastrecord != NULL) \ 499 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 500 else \ 501 (sb)->sb_mb = (m0); \ 502 (sb)->sb_lastrecord = (m0); \ 503 } while (/*CONSTCOND*/0) 504 505 /* 506 * Append mbuf chain m to the last record in the 507 * socket buffer sb. The additional space associated 508 * the mbuf chain is recorded in sb. Empty mbufs are 509 * discarded and mbufs are compacted where possible. 510 */ 511 void 512 sbappend(struct sockbuf *sb, struct mbuf *m) 513 { 514 struct mbuf *n; 515 516 if (m == NULL) 517 return; 518 519 SBLASTRECORDCHK(sb, "sbappend 1"); 520 521 if ((n = sb->sb_lastrecord) != NULL) { 522 /* 523 * XXX Would like to simply use sb_mbtail here, but 524 * XXX I need to verify that I won't miss an EOR that 525 * XXX way. 526 */ 527 do { 528 if (n->m_flags & M_EOR) { 529 sbappendrecord(sb, m); /* XXXXXX!!!! */ 530 return; 531 } 532 } while (n->m_next && (n = n->m_next)); 533 } else { 534 /* 535 * If this is the first record in the socket buffer, it's 536 * also the last record. 537 */ 538 sb->sb_lastrecord = m; 539 } 540 sbcompress(sb, m, n); 541 SBLASTRECORDCHK(sb, "sbappend 2"); 542 } 543 544 /* 545 * This version of sbappend() should only be used when the caller 546 * absolutely knows that there will never be more than one record 547 * in the socket buffer, that is, a stream protocol (such as TCP). 548 */ 549 void 550 sbappendstream(struct sockbuf *sb, struct mbuf *m) 551 { 552 553 KDASSERT(m->m_nextpkt == NULL); 554 KASSERT(sb->sb_mb == sb->sb_lastrecord); 555 556 SBLASTMBUFCHK(sb, __func__); 557 558 sbcompress(sb, m, sb->sb_mbtail); 559 560 sb->sb_lastrecord = sb->sb_mb; 561 SBLASTRECORDCHK(sb, __func__); 562 } 563 564 #ifdef SOCKBUF_DEBUG 565 void 566 sbcheck(struct sockbuf *sb) 567 { 568 struct mbuf *m; 569 u_long len = 0, mbcnt = 0; 570 571 for (m = sb->sb_mb; m; m = m->m_next) { 572 len += m->m_len; 573 mbcnt += MSIZE; 574 if (m->m_flags & M_EXT) 575 mbcnt += m->m_ext.ext_size; 576 if (m->m_nextpkt) 577 panic("sbcheck nextpkt"); 578 } 579 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 580 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 581 mbcnt, sb->sb_mbcnt); 582 panic("sbcheck"); 583 } 584 } 585 #endif 586 587 /* 588 * As above, except the mbuf chain 589 * begins a new record. 590 */ 591 void 592 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 593 { 594 struct mbuf *m; 595 596 if (m0 == NULL) 597 return; 598 599 /* 600 * Put the first mbuf on the queue. 601 * Note this permits zero length records. 602 */ 603 sballoc(sb, m0); 604 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 605 SBLINKRECORD(sb, m0); 606 m = m0->m_next; 607 m0->m_next = NULL; 608 if (m && (m0->m_flags & M_EOR)) { 609 m0->m_flags &= ~M_EOR; 610 m->m_flags |= M_EOR; 611 } 612 sbcompress(sb, m, m0); 613 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 614 } 615 616 /* 617 * As above except that OOB data 618 * is inserted at the beginning of the sockbuf, 619 * but after any other OOB data. 620 */ 621 void 622 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 623 { 624 struct mbuf *m, **mp; 625 626 if (m0 == NULL) 627 return; 628 629 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 630 631 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 632 again: 633 switch (m->m_type) { 634 635 case MT_OOBDATA: 636 continue; /* WANT next train */ 637 638 case MT_CONTROL: 639 if ((m = m->m_next) != NULL) 640 goto again; /* inspect THIS train further */ 641 } 642 break; 643 } 644 /* 645 * Put the first mbuf on the queue. 646 * Note this permits zero length records. 647 */ 648 sballoc(sb, m0); 649 m0->m_nextpkt = *mp; 650 if (*mp == NULL) { 651 /* m0 is actually the new tail */ 652 sb->sb_lastrecord = m0; 653 } 654 *mp = m0; 655 m = m0->m_next; 656 m0->m_next = NULL; 657 if (m && (m0->m_flags & M_EOR)) { 658 m0->m_flags &= ~M_EOR; 659 m->m_flags |= M_EOR; 660 } 661 sbcompress(sb, m, m0); 662 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 663 } 664 665 /* 666 * Append address and data, and optionally, control (ancillary) data 667 * to the receive queue of a socket. If present, 668 * m0 must include a packet header with total length. 669 * Returns 0 if no space in sockbuf or insufficient mbufs. 670 */ 671 int 672 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 673 struct mbuf *control) 674 { 675 struct mbuf *m, *n, *nlast; 676 int space = asa->sa_len; 677 678 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 679 panic("sbappendaddr"); 680 if (m0) 681 space += m0->m_pkthdr.len; 682 for (n = control; n; n = n->m_next) { 683 space += n->m_len; 684 if (n->m_next == NULL) /* keep pointer to last control buf */ 685 break; 686 } 687 if (space > sbspace(sb)) 688 return (0); 689 if (asa->sa_len > MLEN) 690 return (0); 691 MGET(m, M_DONTWAIT, MT_SONAME); 692 if (m == NULL) 693 return (0); 694 m->m_len = asa->sa_len; 695 bcopy(asa, mtod(m, caddr_t), asa->sa_len); 696 if (n) 697 n->m_next = m0; /* concatenate data to control */ 698 else 699 control = m0; 700 m->m_next = control; 701 702 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 703 704 for (n = m; n->m_next != NULL; n = n->m_next) 705 sballoc(sb, n); 706 sballoc(sb, n); 707 nlast = n; 708 SBLINKRECORD(sb, m); 709 710 sb->sb_mbtail = nlast; 711 SBLASTMBUFCHK(sb, "sbappendaddr"); 712 713 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 714 715 return (1); 716 } 717 718 int 719 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 720 { 721 struct mbuf *m, *mlast, *n; 722 int space = 0; 723 724 if (control == NULL) 725 panic("sbappendcontrol"); 726 for (m = control; ; m = m->m_next) { 727 space += m->m_len; 728 if (m->m_next == NULL) 729 break; 730 } 731 n = m; /* save pointer to last control buffer */ 732 for (m = m0; m; m = m->m_next) 733 space += m->m_len; 734 if (space > sbspace(sb)) 735 return (0); 736 n->m_next = m0; /* concatenate data to control */ 737 738 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 739 740 for (m = control; m->m_next != NULL; m = m->m_next) 741 sballoc(sb, m); 742 sballoc(sb, m); 743 mlast = m; 744 SBLINKRECORD(sb, control); 745 746 sb->sb_mbtail = mlast; 747 SBLASTMBUFCHK(sb, "sbappendcontrol"); 748 749 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 750 751 return (1); 752 } 753 754 /* 755 * Compress mbuf chain m into the socket 756 * buffer sb following mbuf n. If n 757 * is null, the buffer is presumed empty. 758 */ 759 void 760 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 761 { 762 int eor = 0; 763 struct mbuf *o; 764 765 while (m) { 766 eor |= m->m_flags & M_EOR; 767 if (m->m_len == 0 && 768 (eor == 0 || 769 (((o = m->m_next) || (o = n)) && 770 o->m_type == m->m_type))) { 771 if (sb->sb_lastrecord == m) 772 sb->sb_lastrecord = m->m_next; 773 m = m_free(m); 774 continue; 775 } 776 if (n && (n->m_flags & M_EOR) == 0 && 777 /* M_TRAILINGSPACE() checks buffer writeability */ 778 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 779 m->m_len <= M_TRAILINGSPACE(n) && 780 n->m_type == m->m_type) { 781 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 782 (unsigned)m->m_len); 783 n->m_len += m->m_len; 784 sb->sb_cc += m->m_len; 785 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 786 sb->sb_datacc += m->m_len; 787 m = m_free(m); 788 continue; 789 } 790 if (n) 791 n->m_next = m; 792 else 793 sb->sb_mb = m; 794 sb->sb_mbtail = m; 795 sballoc(sb, m); 796 n = m; 797 m->m_flags &= ~M_EOR; 798 m = m->m_next; 799 n->m_next = NULL; 800 } 801 if (eor) { 802 if (n) 803 n->m_flags |= eor; 804 else 805 printf("semi-panic: sbcompress"); 806 } 807 SBLASTMBUFCHK(sb, __func__); 808 } 809 810 /* 811 * Free all mbufs in a sockbuf. 812 * Check that all resources are reclaimed. 813 */ 814 void 815 sbflush(struct sockbuf *sb) 816 { 817 818 KASSERT((sb->sb_flags & SB_LOCK) == 0); 819 820 while (sb->sb_mbcnt) 821 sbdrop(sb, (int)sb->sb_cc); 822 823 KASSERT(sb->sb_cc == 0); 824 KASSERT(sb->sb_datacc == 0); 825 KASSERT(sb->sb_mb == NULL); 826 KASSERT(sb->sb_mbtail == NULL); 827 KASSERT(sb->sb_lastrecord == NULL); 828 } 829 830 /* 831 * Drop data from (the front of) a sockbuf. 832 */ 833 void 834 sbdrop(struct sockbuf *sb, int len) 835 { 836 struct mbuf *m, *mn; 837 struct mbuf *next; 838 839 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 840 while (len > 0) { 841 if (m == NULL) { 842 if (next == NULL) 843 panic("sbdrop"); 844 m = next; 845 next = m->m_nextpkt; 846 continue; 847 } 848 if (m->m_len > len) { 849 m->m_len -= len; 850 m->m_data += len; 851 sb->sb_cc -= len; 852 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 853 sb->sb_datacc -= len; 854 break; 855 } 856 len -= m->m_len; 857 sbfree(sb, m); 858 MFREE(m, mn); 859 m = mn; 860 } 861 while (m && m->m_len == 0) { 862 sbfree(sb, m); 863 MFREE(m, mn); 864 m = mn; 865 } 866 if (m) { 867 sb->sb_mb = m; 868 m->m_nextpkt = next; 869 } else 870 sb->sb_mb = next; 871 /* 872 * First part is an inline SB_EMPTY_FIXUP(). Second part 873 * makes sure sb_lastrecord is up-to-date if we dropped 874 * part of the last record. 875 */ 876 m = sb->sb_mb; 877 if (m == NULL) { 878 sb->sb_mbtail = NULL; 879 sb->sb_lastrecord = NULL; 880 } else if (m->m_nextpkt == NULL) 881 sb->sb_lastrecord = m; 882 } 883 884 /* 885 * Drop a record off the front of a sockbuf 886 * and move the next record to the front. 887 */ 888 void 889 sbdroprecord(struct sockbuf *sb) 890 { 891 struct mbuf *m, *mn; 892 893 m = sb->sb_mb; 894 if (m) { 895 sb->sb_mb = m->m_nextpkt; 896 do { 897 sbfree(sb, m); 898 MFREE(m, mn); 899 } while ((m = mn) != NULL); 900 } 901 SB_EMPTY_FIXUP(sb); 902 } 903 904 /* 905 * Create a "control" mbuf containing the specified data 906 * with the specified type for presentation on a socket buffer. 907 */ 908 struct mbuf * 909 sbcreatecontrol(caddr_t p, int size, int type, int level) 910 { 911 struct cmsghdr *cp; 912 struct mbuf *m; 913 914 if (CMSG_SPACE(size) > MCLBYTES) { 915 printf("sbcreatecontrol: message too large %d\n", size); 916 return NULL; 917 } 918 919 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 920 return ((struct mbuf *) NULL); 921 if (CMSG_SPACE(size) > MLEN) { 922 MCLGET(m, M_DONTWAIT); 923 if ((m->m_flags & M_EXT) == 0) { 924 m_free(m); 925 return NULL; 926 } 927 } 928 cp = mtod(m, struct cmsghdr *); 929 bcopy(p, CMSG_DATA(cp), size); 930 m->m_len = CMSG_SPACE(size); 931 cp->cmsg_len = CMSG_LEN(size); 932 cp->cmsg_level = level; 933 cp->cmsg_type = type; 934 return (m); 935 } 936