1 /* $OpenBSD: uipc_socket2.c,v 1.72 2017/02/14 09:46:21 mpi Exp $ */ 2 /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/file.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/domain.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/signalvar.h> 45 #include <sys/event.h> 46 #include <sys/pool.h> 47 48 /* 49 * Primitive routines for operating on sockets and socket buffers 50 */ 51 52 u_long sb_max = SB_MAX; /* patchable */ 53 54 extern struct pool mclpools[]; 55 extern struct pool mbpool; 56 57 int sbsleep(struct sockbuf *, struct rwlock *); 58 59 /* 60 * Procedures to manipulate state flags of socket 61 * and do appropriate wakeups. Normal sequence from the 62 * active (originating) side is that soisconnecting() is 63 * called during processing of connect() call, 64 * resulting in an eventual call to soisconnected() if/when the 65 * connection is established. When the connection is torn down 66 * soisdisconnecting() is called during processing of disconnect() call, 67 * and soisdisconnected() is called when the connection to the peer 68 * is totally severed. The semantics of these routines are such that 69 * connectionless protocols can call soisconnected() and soisdisconnected() 70 * only, bypassing the in-progress calls when setting up a ``connection'' 71 * takes no time. 72 * 73 * From the passive side, a socket is created with 74 * two queues of sockets: so_q0 for connections in progress 75 * and so_q for connections already made and awaiting user acceptance. 76 * As a protocol is preparing incoming connections, it creates a socket 77 * structure queued on so_q0 by calling sonewconn(). When the connection 78 * is established, soisconnected() is called, and transfers the 79 * socket structure to so_q, making it available to accept(). 80 * 81 * If a socket is closed with sockets on either 82 * so_q0 or so_q, these sockets are dropped. 83 * 84 * If higher level protocols are implemented in 85 * the kernel, the wakeups done here will sometimes 86 * cause software-interrupt process scheduling. 87 */ 88 89 void 90 soisconnecting(struct socket *so) 91 { 92 93 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 94 so->so_state |= SS_ISCONNECTING; 95 } 96 97 void 98 soisconnected(struct socket *so) 99 { 100 struct socket *head = so->so_head; 101 102 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 103 so->so_state |= SS_ISCONNECTED; 104 if (head && soqremque(so, 0)) { 105 soqinsque(head, so, 1); 106 sorwakeup(head); 107 wakeup_one(&head->so_timeo); 108 } else { 109 wakeup(&so->so_timeo); 110 sorwakeup(so); 111 sowwakeup(so); 112 } 113 } 114 115 void 116 soisdisconnecting(struct socket *so) 117 { 118 119 so->so_state &= ~SS_ISCONNECTING; 120 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 121 wakeup(&so->so_timeo); 122 sowwakeup(so); 123 sorwakeup(so); 124 } 125 126 void 127 soisdisconnected(struct socket *so) 128 { 129 130 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 131 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); 132 wakeup(&so->so_timeo); 133 sowwakeup(so); 134 sorwakeup(so); 135 } 136 137 /* 138 * When an attempt at a new connection is noted on a socket 139 * which accepts connections, sonewconn is called. If the 140 * connection is possible (subject to space constraints, etc.) 141 * then we allocate a new structure, properly linked into the 142 * data structure of the original socket, and return this. 143 * Connstatus may be 0 or SS_ISCONNECTED. 144 */ 145 struct socket * 146 sonewconn(struct socket *head, int connstatus) 147 { 148 struct socket *so; 149 int soqueue = connstatus ? 1 : 0; 150 151 soassertlocked(head); 152 153 if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100) 154 return (NULL); 155 if (head->so_qlen + head->so_q0len > head->so_qlimit * 3) 156 return (NULL); 157 so = pool_get(&socket_pool, PR_NOWAIT|PR_ZERO); 158 if (so == NULL) 159 return (NULL); 160 so->so_type = head->so_type; 161 so->so_options = head->so_options &~ SO_ACCEPTCONN; 162 so->so_linger = head->so_linger; 163 so->so_state = head->so_state | SS_NOFDREF; 164 so->so_proto = head->so_proto; 165 so->so_timeo = head->so_timeo; 166 so->so_pgid = head->so_pgid; 167 so->so_euid = head->so_euid; 168 so->so_ruid = head->so_ruid; 169 so->so_egid = head->so_egid; 170 so->so_rgid = head->so_rgid; 171 so->so_cpid = head->so_cpid; 172 so->so_siguid = head->so_siguid; 173 so->so_sigeuid = head->so_sigeuid; 174 175 /* 176 * Inherit watermarks but those may get clamped in low mem situations. 177 */ 178 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { 179 pool_put(&socket_pool, so); 180 return (NULL); 181 } 182 so->so_snd.sb_wat = head->so_snd.sb_wat; 183 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 184 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 185 so->so_rcv.sb_wat = head->so_rcv.sb_wat; 186 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 187 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 188 189 soqinsque(head, so, soqueue); 190 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL, 191 curproc)) { 192 (void) soqremque(so, soqueue); 193 pool_put(&socket_pool, so); 194 return (NULL); 195 } 196 if (connstatus) { 197 sorwakeup(head); 198 wakeup(&head->so_timeo); 199 so->so_state |= connstatus; 200 } 201 return (so); 202 } 203 204 void 205 soqinsque(struct socket *head, struct socket *so, int q) 206 { 207 208 #ifdef DIAGNOSTIC 209 if (so->so_onq != NULL) 210 panic("soqinsque"); 211 #endif 212 213 so->so_head = head; 214 if (q == 0) { 215 head->so_q0len++; 216 so->so_onq = &head->so_q0; 217 } else { 218 head->so_qlen++; 219 so->so_onq = &head->so_q; 220 } 221 TAILQ_INSERT_TAIL(so->so_onq, so, so_qe); 222 } 223 224 int 225 soqremque(struct socket *so, int q) 226 { 227 struct socket *head; 228 229 head = so->so_head; 230 if (q == 0) { 231 if (so->so_onq != &head->so_q0) 232 return (0); 233 head->so_q0len--; 234 } else { 235 if (so->so_onq != &head->so_q) 236 return (0); 237 head->so_qlen--; 238 } 239 TAILQ_REMOVE(so->so_onq, so, so_qe); 240 so->so_onq = NULL; 241 so->so_head = NULL; 242 return (1); 243 } 244 245 /* 246 * Socantsendmore indicates that no more data will be sent on the 247 * socket; it would normally be applied to a socket when the user 248 * informs the system that no more data is to be sent, by the protocol 249 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 250 * will be received, and will normally be applied to the socket by a 251 * protocol when it detects that the peer will send no more data. 252 * Data queued for reading in the socket may yet be read. 253 */ 254 255 void 256 socantsendmore(struct socket *so) 257 { 258 259 so->so_state |= SS_CANTSENDMORE; 260 sowwakeup(so); 261 } 262 263 void 264 socantrcvmore(struct socket *so) 265 { 266 267 so->so_state |= SS_CANTRCVMORE; 268 sorwakeup(so); 269 } 270 271 int 272 solock(struct socket *so) 273 { 274 int s; 275 276 if (so->so_proto->pr_domain->dom_family != PF_LOCAL) 277 NET_LOCK(s); 278 else 279 s = -42; 280 281 return (s); 282 } 283 284 void 285 sounlock(int s) 286 { 287 if (s != -42) 288 NET_UNLOCK(s); 289 } 290 291 void 292 soassertlocked(struct socket *so) 293 { 294 if (so->so_proto->pr_domain->dom_family != PF_LOCAL) 295 NET_ASSERT_LOCKED(); 296 } 297 298 int 299 sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int timo) 300 { 301 if (so->so_proto->pr_domain->dom_family != PF_LOCAL) 302 return rwsleep(ident, &netlock, prio, wmesg, timo); 303 else 304 return tsleep(ident, prio, wmesg, timo); 305 } 306 307 /* 308 * Wait for data to arrive at/drain from a socket buffer. 309 */ 310 int 311 sbwait(struct socket *so, struct sockbuf *sb) 312 { 313 soassertlocked(so); 314 315 sb->sb_flagsintr |= SB_WAIT; 316 return (sosleep(so, &sb->sb_cc, 317 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio", 318 sb->sb_timeo)); 319 } 320 321 int 322 sbsleep(struct sockbuf *sb, struct rwlock *lock) 323 { 324 int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH; 325 326 if (lock != NULL) 327 error = rwsleep(&sb->sb_flags, lock, prio, "netlck", 0); 328 else 329 error = tsleep(&sb->sb_flags, prio, "netlck", 0); 330 331 return (error); 332 } 333 334 int 335 sblock(struct sockbuf *sb, int wait, struct rwlock *lock) 336 { 337 int error; 338 339 KERNEL_ASSERT_LOCKED(); 340 341 if ((sb->sb_flags & SB_LOCK) == 0) { 342 sb->sb_flags |= SB_LOCK; 343 return (0); 344 } 345 if (wait & M_NOWAIT) 346 return (EWOULDBLOCK); 347 348 while (sb->sb_flags & SB_LOCK) { 349 sb->sb_flags |= SB_WANT; 350 error = sbsleep(sb, lock); 351 if (error) 352 return (error); 353 } 354 sb->sb_flags |= SB_LOCK; 355 return (0); 356 } 357 358 void 359 sbunlock(struct sockbuf *sb) 360 { 361 KERNEL_ASSERT_LOCKED(); 362 363 sb->sb_flags &= ~SB_LOCK; 364 if (sb->sb_flags & SB_WANT) { 365 sb->sb_flags &= ~SB_WANT; 366 wakeup(&sb->sb_flags); 367 } 368 } 369 370 /* 371 * Wakeup processes waiting on a socket buffer. 372 * Do asynchronous notification via SIGIO 373 * if the socket has the SS_ASYNC flag set. 374 */ 375 void 376 sowakeup(struct socket *so, struct sockbuf *sb) 377 { 378 soassertlocked(so); 379 380 selwakeup(&sb->sb_sel); 381 sb->sb_flagsintr &= ~SB_SEL; 382 if (sb->sb_flagsintr & SB_WAIT) { 383 sb->sb_flagsintr &= ~SB_WAIT; 384 wakeup(&sb->sb_cc); 385 } 386 if (so->so_state & SS_ASYNC) 387 csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid); 388 } 389 390 /* 391 * Socket buffer (struct sockbuf) utility routines. 392 * 393 * Each socket contains two socket buffers: one for sending data and 394 * one for receiving data. Each buffer contains a queue of mbufs, 395 * information about the number of mbufs and amount of data in the 396 * queue, and other fields allowing select() statements and notification 397 * on data availability to be implemented. 398 * 399 * Data stored in a socket buffer is maintained as a list of records. 400 * Each record is a list of mbufs chained together with the m_next 401 * field. Records are chained together with the m_nextpkt field. The upper 402 * level routine soreceive() expects the following conventions to be 403 * observed when placing information in the receive buffer: 404 * 405 * 1. If the protocol requires each message be preceded by the sender's 406 * name, then a record containing that name must be present before 407 * any associated data (mbuf's must be of type MT_SONAME). 408 * 2. If the protocol supports the exchange of ``access rights'' (really 409 * just additional data associated with the message), and there are 410 * ``rights'' to be received, then a record containing this data 411 * should be present (mbuf's must be of type MT_CONTROL). 412 * 3. If a name or rights record exists, then it must be followed by 413 * a data record, perhaps of zero length. 414 * 415 * Before using a new socket structure it is first necessary to reserve 416 * buffer space to the socket, by calling sbreserve(). This should commit 417 * some of the available buffer space in the system buffer pool for the 418 * socket (currently, it does nothing but enforce limits). The space 419 * should be released by calling sbrelease() when the socket is destroyed. 420 */ 421 422 int 423 soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 424 { 425 426 if (sbreserve(&so->so_snd, sndcc)) 427 goto bad; 428 if (sbreserve(&so->so_rcv, rcvcc)) 429 goto bad2; 430 so->so_snd.sb_wat = sndcc; 431 so->so_rcv.sb_wat = rcvcc; 432 if (so->so_rcv.sb_lowat == 0) 433 so->so_rcv.sb_lowat = 1; 434 if (so->so_snd.sb_lowat == 0) 435 so->so_snd.sb_lowat = MCLBYTES; 436 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 437 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 438 return (0); 439 bad2: 440 sbrelease(&so->so_snd); 441 bad: 442 return (ENOBUFS); 443 } 444 445 /* 446 * Allot mbufs to a sockbuf. 447 * Attempt to scale mbmax so that mbcnt doesn't become limiting 448 * if buffering efficiency is near the normal case. 449 */ 450 int 451 sbreserve(struct sockbuf *sb, u_long cc) 452 { 453 454 if (cc == 0 || cc > sb_max) 455 return (1); 456 sb->sb_hiwat = cc; 457 sb->sb_mbmax = max(3 * MAXMCLBYTES, 458 min(cc * 2, sb_max + (sb_max / MCLBYTES) * MSIZE)); 459 if (sb->sb_lowat > sb->sb_hiwat) 460 sb->sb_lowat = sb->sb_hiwat; 461 return (0); 462 } 463 464 /* 465 * In low memory situation, do not accept any greater than normal request. 466 */ 467 int 468 sbcheckreserve(u_long cnt, u_long defcnt) 469 { 470 if (cnt > defcnt && sbchecklowmem()) 471 return (ENOBUFS); 472 return (0); 473 } 474 475 int 476 sbchecklowmem(void) 477 { 478 static int sblowmem; 479 480 if (mclpools[0].pr_nout < mclpools[0].pr_hardlimit * 60 / 100 || 481 mbpool.pr_nout < mbpool.pr_hardlimit * 60 / 100) 482 sblowmem = 0; 483 if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 80 / 100 || 484 mbpool.pr_nout > mbpool.pr_hardlimit * 80 / 100) 485 sblowmem = 1; 486 return (sblowmem); 487 } 488 489 /* 490 * Free mbufs held by a socket, and reserved mbuf space. 491 */ 492 void 493 sbrelease(struct sockbuf *sb) 494 { 495 496 sbflush(sb); 497 sb->sb_hiwat = sb->sb_mbmax = 0; 498 } 499 500 /* 501 * Routines to add and remove 502 * data from an mbuf queue. 503 * 504 * The routines sbappend() or sbappendrecord() are normally called to 505 * append new mbufs to a socket buffer, after checking that adequate 506 * space is available, comparing the function sbspace() with the amount 507 * of data to be added. sbappendrecord() differs from sbappend() in 508 * that data supplied is treated as the beginning of a new record. 509 * To place a sender's address, optional access rights, and data in a 510 * socket receive buffer, sbappendaddr() should be used. To place 511 * access rights and data in a socket receive buffer, sbappendrights() 512 * should be used. In either case, the new data begins a new record. 513 * Note that unlike sbappend() and sbappendrecord(), these routines check 514 * for the caller that there will be enough space to store the data. 515 * Each fails if there is not enough space, or if it cannot find mbufs 516 * to store additional information in. 517 * 518 * Reliable protocols may use the socket send buffer to hold data 519 * awaiting acknowledgement. Data is normally copied from a socket 520 * send buffer in a protocol with m_copym for output to a peer, 521 * and then removing the data from the socket buffer with sbdrop() 522 * or sbdroprecord() when the data is acknowledged by the peer. 523 */ 524 525 #ifdef SOCKBUF_DEBUG 526 void 527 sblastrecordchk(struct sockbuf *sb, const char *where) 528 { 529 struct mbuf *m = sb->sb_mb; 530 531 while (m && m->m_nextpkt) 532 m = m->m_nextpkt; 533 534 if (m != sb->sb_lastrecord) { 535 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n", 536 sb->sb_mb, sb->sb_lastrecord, m); 537 printf("packet chain:\n"); 538 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 539 printf("\t%p\n", m); 540 panic("sblastrecordchk from %s", where); 541 } 542 } 543 544 void 545 sblastmbufchk(struct sockbuf *sb, const char *where) 546 { 547 struct mbuf *m = sb->sb_mb; 548 struct mbuf *n; 549 550 while (m && m->m_nextpkt) 551 m = m->m_nextpkt; 552 553 while (m && m->m_next) 554 m = m->m_next; 555 556 if (m != sb->sb_mbtail) { 557 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n", 558 sb->sb_mb, sb->sb_mbtail, m); 559 printf("packet tree:\n"); 560 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 561 printf("\t"); 562 for (n = m; n != NULL; n = n->m_next) 563 printf("%p ", n); 564 printf("\n"); 565 } 566 panic("sblastmbufchk from %s", where); 567 } 568 } 569 #endif /* SOCKBUF_DEBUG */ 570 571 #define SBLINKRECORD(sb, m0) \ 572 do { \ 573 if ((sb)->sb_lastrecord != NULL) \ 574 (sb)->sb_lastrecord->m_nextpkt = (m0); \ 575 else \ 576 (sb)->sb_mb = (m0); \ 577 (sb)->sb_lastrecord = (m0); \ 578 } while (/*CONSTCOND*/0) 579 580 /* 581 * Append mbuf chain m to the last record in the 582 * socket buffer sb. The additional space associated 583 * the mbuf chain is recorded in sb. Empty mbufs are 584 * discarded and mbufs are compacted where possible. 585 */ 586 void 587 sbappend(struct sockbuf *sb, struct mbuf *m) 588 { 589 struct mbuf *n; 590 591 if (m == NULL) 592 return; 593 594 SBLASTRECORDCHK(sb, "sbappend 1"); 595 596 if ((n = sb->sb_lastrecord) != NULL) { 597 /* 598 * XXX Would like to simply use sb_mbtail here, but 599 * XXX I need to verify that I won't miss an EOR that 600 * XXX way. 601 */ 602 do { 603 if (n->m_flags & M_EOR) { 604 sbappendrecord(sb, m); /* XXXXXX!!!! */ 605 return; 606 } 607 } while (n->m_next && (n = n->m_next)); 608 } else { 609 /* 610 * If this is the first record in the socket buffer, it's 611 * also the last record. 612 */ 613 sb->sb_lastrecord = m; 614 } 615 sbcompress(sb, m, n); 616 SBLASTRECORDCHK(sb, "sbappend 2"); 617 } 618 619 /* 620 * This version of sbappend() should only be used when the caller 621 * absolutely knows that there will never be more than one record 622 * in the socket buffer, that is, a stream protocol (such as TCP). 623 */ 624 void 625 sbappendstream(struct sockbuf *sb, struct mbuf *m) 626 { 627 628 KDASSERT(m->m_nextpkt == NULL); 629 KASSERT(sb->sb_mb == sb->sb_lastrecord); 630 631 SBLASTMBUFCHK(sb, __func__); 632 633 sbcompress(sb, m, sb->sb_mbtail); 634 635 sb->sb_lastrecord = sb->sb_mb; 636 SBLASTRECORDCHK(sb, __func__); 637 } 638 639 #ifdef SOCKBUF_DEBUG 640 void 641 sbcheck(struct sockbuf *sb) 642 { 643 struct mbuf *m, *n; 644 u_long len = 0, mbcnt = 0; 645 646 for (m = sb->sb_mb; m; m = m->m_nextpkt) { 647 for (n = m; n; n = n->m_next) { 648 len += n->m_len; 649 mbcnt += MSIZE; 650 if (n->m_flags & M_EXT) 651 mbcnt += n->m_ext.ext_size; 652 if (m != n && n->m_nextpkt) 653 panic("sbcheck nextpkt"); 654 } 655 } 656 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 657 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc, 658 mbcnt, sb->sb_mbcnt); 659 panic("sbcheck"); 660 } 661 } 662 #endif 663 664 /* 665 * As above, except the mbuf chain 666 * begins a new record. 667 */ 668 void 669 sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 670 { 671 struct mbuf *m; 672 673 if (m0 == NULL) 674 return; 675 676 /* 677 * Put the first mbuf on the queue. 678 * Note this permits zero length records. 679 */ 680 sballoc(sb, m0); 681 SBLASTRECORDCHK(sb, "sbappendrecord 1"); 682 SBLINKRECORD(sb, m0); 683 m = m0->m_next; 684 m0->m_next = NULL; 685 if (m && (m0->m_flags & M_EOR)) { 686 m0->m_flags &= ~M_EOR; 687 m->m_flags |= M_EOR; 688 } 689 sbcompress(sb, m, m0); 690 SBLASTRECORDCHK(sb, "sbappendrecord 2"); 691 } 692 693 /* 694 * As above except that OOB data 695 * is inserted at the beginning of the sockbuf, 696 * but after any other OOB data. 697 */ 698 void 699 sbinsertoob(struct sockbuf *sb, struct mbuf *m0) 700 { 701 struct mbuf *m, **mp; 702 703 if (m0 == NULL) 704 return; 705 706 SBLASTRECORDCHK(sb, "sbinsertoob 1"); 707 708 for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) { 709 again: 710 switch (m->m_type) { 711 712 case MT_OOBDATA: 713 continue; /* WANT next train */ 714 715 case MT_CONTROL: 716 if ((m = m->m_next) != NULL) 717 goto again; /* inspect THIS train further */ 718 } 719 break; 720 } 721 /* 722 * Put the first mbuf on the queue. 723 * Note this permits zero length records. 724 */ 725 sballoc(sb, m0); 726 m0->m_nextpkt = *mp; 727 if (*mp == NULL) { 728 /* m0 is actually the new tail */ 729 sb->sb_lastrecord = m0; 730 } 731 *mp = m0; 732 m = m0->m_next; 733 m0->m_next = NULL; 734 if (m && (m0->m_flags & M_EOR)) { 735 m0->m_flags &= ~M_EOR; 736 m->m_flags |= M_EOR; 737 } 738 sbcompress(sb, m, m0); 739 SBLASTRECORDCHK(sb, "sbinsertoob 2"); 740 } 741 742 /* 743 * Append address and data, and optionally, control (ancillary) data 744 * to the receive queue of a socket. If present, 745 * m0 must include a packet header with total length. 746 * Returns 0 if no space in sockbuf or insufficient mbufs. 747 */ 748 int 749 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, 750 struct mbuf *control) 751 { 752 struct mbuf *m, *n, *nlast; 753 int space = asa->sa_len; 754 755 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 756 panic("sbappendaddr"); 757 if (m0) 758 space += m0->m_pkthdr.len; 759 for (n = control; n; n = n->m_next) { 760 space += n->m_len; 761 if (n->m_next == NULL) /* keep pointer to last control buf */ 762 break; 763 } 764 if (space > sbspace(sb)) 765 return (0); 766 if (asa->sa_len > MLEN) 767 return (0); 768 MGET(m, M_DONTWAIT, MT_SONAME); 769 if (m == NULL) 770 return (0); 771 m->m_len = asa->sa_len; 772 memcpy(mtod(m, caddr_t), asa, asa->sa_len); 773 if (n) 774 n->m_next = m0; /* concatenate data to control */ 775 else 776 control = m0; 777 m->m_next = control; 778 779 SBLASTRECORDCHK(sb, "sbappendaddr 1"); 780 781 for (n = m; n->m_next != NULL; n = n->m_next) 782 sballoc(sb, n); 783 sballoc(sb, n); 784 nlast = n; 785 SBLINKRECORD(sb, m); 786 787 sb->sb_mbtail = nlast; 788 SBLASTMBUFCHK(sb, "sbappendaddr"); 789 790 SBLASTRECORDCHK(sb, "sbappendaddr 2"); 791 792 return (1); 793 } 794 795 int 796 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 797 { 798 struct mbuf *m, *mlast, *n; 799 int space = 0; 800 801 if (control == NULL) 802 panic("sbappendcontrol"); 803 for (m = control; ; m = m->m_next) { 804 space += m->m_len; 805 if (m->m_next == NULL) 806 break; 807 } 808 n = m; /* save pointer to last control buffer */ 809 for (m = m0; m; m = m->m_next) 810 space += m->m_len; 811 if (space > sbspace(sb)) 812 return (0); 813 n->m_next = m0; /* concatenate data to control */ 814 815 SBLASTRECORDCHK(sb, "sbappendcontrol 1"); 816 817 for (m = control; m->m_next != NULL; m = m->m_next) 818 sballoc(sb, m); 819 sballoc(sb, m); 820 mlast = m; 821 SBLINKRECORD(sb, control); 822 823 sb->sb_mbtail = mlast; 824 SBLASTMBUFCHK(sb, "sbappendcontrol"); 825 826 SBLASTRECORDCHK(sb, "sbappendcontrol 2"); 827 828 return (1); 829 } 830 831 /* 832 * Compress mbuf chain m into the socket 833 * buffer sb following mbuf n. If n 834 * is null, the buffer is presumed empty. 835 */ 836 void 837 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 838 { 839 int eor = 0; 840 struct mbuf *o; 841 842 while (m) { 843 eor |= m->m_flags & M_EOR; 844 if (m->m_len == 0 && 845 (eor == 0 || 846 (((o = m->m_next) || (o = n)) && 847 o->m_type == m->m_type))) { 848 if (sb->sb_lastrecord == m) 849 sb->sb_lastrecord = m->m_next; 850 m = m_free(m); 851 continue; 852 } 853 if (n && (n->m_flags & M_EOR) == 0 && 854 /* M_TRAILINGSPACE() checks buffer writeability */ 855 m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */ 856 m->m_len <= M_TRAILINGSPACE(n) && 857 n->m_type == m->m_type) { 858 memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), 859 m->m_len); 860 n->m_len += m->m_len; 861 sb->sb_cc += m->m_len; 862 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 863 sb->sb_datacc += m->m_len; 864 m = m_free(m); 865 continue; 866 } 867 if (n) 868 n->m_next = m; 869 else 870 sb->sb_mb = m; 871 sb->sb_mbtail = m; 872 sballoc(sb, m); 873 n = m; 874 m->m_flags &= ~M_EOR; 875 m = m->m_next; 876 n->m_next = NULL; 877 } 878 if (eor) { 879 if (n) 880 n->m_flags |= eor; 881 else 882 printf("semi-panic: sbcompress"); 883 } 884 SBLASTMBUFCHK(sb, __func__); 885 } 886 887 /* 888 * Free all mbufs in a sockbuf. 889 * Check that all resources are reclaimed. 890 */ 891 void 892 sbflush(struct sockbuf *sb) 893 { 894 895 KASSERT((sb->sb_flags & SB_LOCK) == 0); 896 897 while (sb->sb_mbcnt) 898 sbdrop(sb, (int)sb->sb_cc); 899 900 KASSERT(sb->sb_cc == 0); 901 KASSERT(sb->sb_datacc == 0); 902 KASSERT(sb->sb_mb == NULL); 903 KASSERT(sb->sb_mbtail == NULL); 904 KASSERT(sb->sb_lastrecord == NULL); 905 } 906 907 /* 908 * Drop data from (the front of) a sockbuf. 909 */ 910 void 911 sbdrop(struct sockbuf *sb, int len) 912 { 913 struct mbuf *m, *mn; 914 struct mbuf *next; 915 916 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 917 while (len > 0) { 918 if (m == NULL) { 919 if (next == NULL) 920 panic("sbdrop"); 921 m = next; 922 next = m->m_nextpkt; 923 continue; 924 } 925 if (m->m_len > len) { 926 m->m_len -= len; 927 m->m_data += len; 928 sb->sb_cc -= len; 929 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 930 sb->sb_datacc -= len; 931 break; 932 } 933 len -= m->m_len; 934 sbfree(sb, m); 935 mn = m_free(m); 936 m = mn; 937 } 938 while (m && m->m_len == 0) { 939 sbfree(sb, m); 940 mn = m_free(m); 941 m = mn; 942 } 943 if (m) { 944 sb->sb_mb = m; 945 m->m_nextpkt = next; 946 } else 947 sb->sb_mb = next; 948 /* 949 * First part is an inline SB_EMPTY_FIXUP(). Second part 950 * makes sure sb_lastrecord is up-to-date if we dropped 951 * part of the last record. 952 */ 953 m = sb->sb_mb; 954 if (m == NULL) { 955 sb->sb_mbtail = NULL; 956 sb->sb_lastrecord = NULL; 957 } else if (m->m_nextpkt == NULL) 958 sb->sb_lastrecord = m; 959 } 960 961 /* 962 * Drop a record off the front of a sockbuf 963 * and move the next record to the front. 964 */ 965 void 966 sbdroprecord(struct sockbuf *sb) 967 { 968 struct mbuf *m, *mn; 969 970 m = sb->sb_mb; 971 if (m) { 972 sb->sb_mb = m->m_nextpkt; 973 do { 974 sbfree(sb, m); 975 mn = m_free(m); 976 } while ((m = mn) != NULL); 977 } 978 SB_EMPTY_FIXUP(sb); 979 } 980 981 /* 982 * Create a "control" mbuf containing the specified data 983 * with the specified type for presentation on a socket buffer. 984 */ 985 struct mbuf * 986 sbcreatecontrol(caddr_t p, int size, int type, int level) 987 { 988 struct cmsghdr *cp; 989 struct mbuf *m; 990 991 if (CMSG_SPACE(size) > MCLBYTES) { 992 printf("sbcreatecontrol: message too large %d\n", size); 993 return NULL; 994 } 995 996 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) 997 return (NULL); 998 if (CMSG_SPACE(size) > MLEN) { 999 MCLGET(m, M_DONTWAIT); 1000 if ((m->m_flags & M_EXT) == 0) { 1001 m_free(m); 1002 return NULL; 1003 } 1004 } 1005 cp = mtod(m, struct cmsghdr *); 1006 memset(cp, 0, CMSG_SPACE(size)); 1007 memcpy(CMSG_DATA(cp), p, size); 1008 m->m_len = CMSG_SPACE(size); 1009 cp->cmsg_len = CMSG_LEN(size); 1010 cp->cmsg_level = level; 1011 cp->cmsg_type = type; 1012 return (m); 1013 } 1014