1 /* 2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)uipc_socket2.c 7.9 (Berkeley) 05/09/89 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "user.h" 23 #include "proc.h" 24 #include "file.h" 25 #include "buf.h" 26 #include "malloc.h" 27 #include "mbuf.h" 28 #include "protosw.h" 29 #include "socket.h" 30 #include "socketvar.h" 31 32 /* 33 * Primitive routines for operating on sockets and socket buffers 34 */ 35 36 /* 37 * Procedures to manipulate state flags of socket 38 * and do appropriate wakeups. Normal sequence from the 39 * active (originating) side is that soisconnecting() is 40 * called during processing of connect() call, 41 * resulting in an eventual call to soisconnected() if/when the 42 * connection is established. When the connection is torn down 43 * soisdisconnecting() is called during processing of disconnect() call, 44 * and soisdisconnected() is called when the connection to the peer 45 * is totally severed. The semantics of these routines are such that 46 * connectionless protocols can call soisconnected() and soisdisconnected() 47 * only, bypassing the in-progress calls when setting up a ``connection'' 48 * takes no time. 49 * 50 * From the passive side, a socket is created with 51 * two queues of sockets: so_q0 for connections in progress 52 * and so_q for connections already made and awaiting user acceptance. 53 * As a protocol is preparing incoming connections, it creates a socket 54 * structure queued on so_q0 by calling sonewconn(). When the connection 55 * is established, soisconnected() is called, and transfers the 56 * socket structure to so_q, making it available to accept(). 57 * 58 * If a socket is closed with sockets on either 59 * so_q0 or so_q, these sockets are dropped. 60 * 61 * If higher level protocols are implemented in 62 * the kernel, the wakeups done here will sometimes 63 * cause software-interrupt process scheduling. 64 */ 65 66 soisconnecting(so) 67 register struct socket *so; 68 { 69 70 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 71 so->so_state |= SS_ISCONNECTING; 72 wakeup((caddr_t)&so->so_timeo); 73 } 74 75 soisconnected(so) 76 register struct socket *so; 77 { 78 register struct socket *head = so->so_head; 79 80 if (head) { 81 if (soqremque(so, 0) == 0) 82 panic("soisconnected"); 83 soqinsque(head, so, 1); 84 sorwakeup(head); 85 wakeup((caddr_t)&head->so_timeo); 86 } 87 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 88 so->so_state |= SS_ISCONNECTED; 89 wakeup((caddr_t)&so->so_timeo); 90 sorwakeup(so); 91 sowwakeup(so); 92 } 93 94 soisdisconnecting(so) 95 register struct socket *so; 96 { 97 98 so->so_state &= ~SS_ISCONNECTING; 99 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 100 wakeup((caddr_t)&so->so_timeo); 101 sowwakeup(so); 102 sorwakeup(so); 103 } 104 105 soisdisconnected(so) 106 register struct socket *so; 107 { 108 109 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 110 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 111 wakeup((caddr_t)&so->so_timeo); 112 sowwakeup(so); 113 sorwakeup(so); 114 } 115 116 /* 117 * When an attempt at a new connection is noted on a socket 118 * which accepts connections, sonewconn is called. If the 119 * connection is possible (subject to space constraints, etc.) 120 * then we allocate a new structure, propoerly linked into the 121 * data structure of the original socket, and return this. 122 */ 123 struct socket * 124 sonewconn(head) 125 register struct socket *head; 126 { 127 register struct socket *so; 128 129 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 130 return ((struct socket *)0); 131 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); 132 if (so == NULL) 133 return ((struct socket *)0); 134 bzero((caddr_t)so, sizeof(*so)); 135 so->so_type = head->so_type; 136 so->so_options = head->so_options &~ SO_ACCEPTCONN; 137 so->so_linger = head->so_linger; 138 so->so_state = head->so_state | SS_NOFDREF; 139 so->so_proto = head->so_proto; 140 so->so_timeo = head->so_timeo; 141 so->so_pgid = head->so_pgid; 142 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 143 soqinsque(head, so, 0); 144 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 145 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 146 (void) soqremque(so, 0); 147 (void) free((caddr_t)so, M_SOCKET); 148 return ((struct socket *)0); 149 } 150 return (so); 151 } 152 153 soqinsque(head, so, q) 154 register struct socket *head, *so; 155 int q; 156 { 157 158 so->so_head = head; 159 if (q == 0) { 160 head->so_q0len++; 161 so->so_q0 = head->so_q0; 162 head->so_q0 = so; 163 } else { 164 head->so_qlen++; 165 so->so_q = head->so_q; 166 head->so_q = so; 167 } 168 } 169 170 soqremque(so, q) 171 register struct socket *so; 172 int q; 173 { 174 register struct socket *head, *prev, *next; 175 176 head = so->so_head; 177 prev = head; 178 for (;;) { 179 next = q ? prev->so_q : prev->so_q0; 180 if (next == so) 181 break; 182 if (next == head) 183 return (0); 184 prev = next; 185 } 186 if (q == 0) { 187 prev->so_q0 = next->so_q0; 188 head->so_q0len--; 189 } else { 190 prev->so_q = next->so_q; 191 head->so_qlen--; 192 } 193 next->so_q0 = next->so_q = 0; 194 next->so_head = 0; 195 return (1); 196 } 197 198 /* 199 * Socantsendmore indicates that no more data will be sent on the 200 * socket; it would normally be applied to a socket when the user 201 * informs the system that no more data is to be sent, by the protocol 202 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 203 * will be received, and will normally be applied to the socket by a 204 * protocol when it detects that the peer will send no more data. 205 * Data queued for reading in the socket may yet be read. 206 */ 207 208 socantsendmore(so) 209 struct socket *so; 210 { 211 212 so->so_state |= SS_CANTSENDMORE; 213 sowwakeup(so); 214 } 215 216 socantrcvmore(so) 217 struct socket *so; 218 { 219 220 so->so_state |= SS_CANTRCVMORE; 221 sorwakeup(so); 222 } 223 224 /* 225 * Socket select/wakeup routines. 226 */ 227 228 /* 229 * Queue a process for a select on a socket buffer. 230 */ 231 sbselqueue(sb) 232 struct sockbuf *sb; 233 { 234 struct proc *p; 235 236 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 237 sb->sb_flags |= SB_COLL; 238 else 239 sb->sb_sel = u.u_procp; 240 } 241 242 /* 243 * Wait for data to arrive at/drain from a socket buffer. 244 */ 245 sbwait(sb) 246 struct sockbuf *sb; 247 { 248 249 sb->sb_flags |= SB_WAIT; 250 sleep((caddr_t)&sb->sb_cc, PZERO+1); 251 } 252 253 /* 254 * Wakeup processes waiting on a socket buffer. 255 * Do asynchronous notification via SIGIO 256 * if the socket has the SS_ASYNC flag set. 257 */ 258 sowakeup(so, sb) 259 register struct socket *so; 260 register struct sockbuf *sb; 261 { 262 struct proc *p; 263 264 if (sb->sb_sel) { 265 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 266 sb->sb_sel = 0; 267 sb->sb_flags &= ~SB_COLL; 268 } 269 if (sb->sb_flags & SB_WAIT) { 270 sb->sb_flags &= ~SB_WAIT; 271 wakeup((caddr_t)&sb->sb_cc); 272 } 273 if (so->so_state & SS_ASYNC) { 274 if (so->so_pgid < 0) 275 gsignal(-so->so_pgid, SIGIO); 276 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 277 psignal(p, SIGIO); 278 } 279 } 280 281 /* 282 * Socket buffer (struct sockbuf) utility routines. 283 * 284 * Each socket contains two socket buffers: one for sending data and 285 * one for receiving data. Each buffer contains a queue of mbufs, 286 * information about the number of mbufs and amount of data in the 287 * queue, and other fields allowing select() statements and notification 288 * on data availability to be implemented. 289 * 290 * Data stored in a socket buffer is maintained as a list of records. 291 * Each record is a list of mbufs chained together with the m_next 292 * field. Records are chained together with the m_nextpkt field. The upper 293 * level routine soreceive() expects the following conventions to be 294 * observed when placing information in the receive buffer: 295 * 296 * 1. If the protocol requires each message be preceded by the sender's 297 * name, then a record containing that name must be present before 298 * any associated data (mbuf's must be of type MT_SONAME). 299 * 2. If the protocol supports the exchange of ``access rights'' (really 300 * just additional data associated with the message), and there are 301 * ``rights'' to be received, then a record containing this data 302 * should be present (mbuf's must be of type MT_RIGHTS). 303 * 3. If a name or rights record exists, then it must be followed by 304 * a data record, perhaps of zero length. 305 * 306 * Before using a new socket structure it is first necessary to reserve 307 * buffer space to the socket, by calling sbreserve(). This should commit 308 * some of the available buffer space in the system buffer pool for the 309 * socket (currently, it does nothing but enforce limits). The space 310 * should be released by calling sbrelease() when the socket is destroyed. 311 */ 312 313 soreserve(so, sndcc, rcvcc) 314 register struct socket *so; 315 u_long sndcc, rcvcc; 316 { 317 318 if (sbreserve(&so->so_snd, sndcc) == 0) 319 goto bad; 320 if (sbreserve(&so->so_rcv, rcvcc) == 0) 321 goto bad2; 322 return (0); 323 bad2: 324 sbrelease(&so->so_snd); 325 bad: 326 return (ENOBUFS); 327 } 328 329 /* 330 * Allot mbufs to a sockbuf. 331 * Attempt to scale cc so that mbcnt doesn't become limiting 332 * if buffering efficiency is near the normal case. 333 */ 334 sbreserve(sb, cc) 335 struct sockbuf *sb; 336 u_long cc; 337 { 338 339 if (cc > (u_long)SB_MAX * MCLBYTES / (2 * MSIZE + MCLBYTES)) 340 return (0); 341 sb->sb_hiwat = cc; 342 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 343 return (1); 344 } 345 346 /* 347 * Free mbufs held by a socket, and reserved mbuf space. 348 */ 349 sbrelease(sb) 350 struct sockbuf *sb; 351 { 352 353 sbflush(sb); 354 sb->sb_hiwat = sb->sb_mbmax = 0; 355 } 356 357 /* 358 * Routines to add and remove 359 * data from an mbuf queue. 360 * 361 * The routines sbappend() or sbappendrecord() are normally called to 362 * append new mbufs to a socket buffer, after checking that adequate 363 * space is available, comparing the function sbspace() with the amount 364 * of data to be added. sbappendrecord() differs from sbappend() in 365 * that data supplied is treated as the beginning of a new record. 366 * To place a sender's address, optional access rights, and data in a 367 * socket receive buffer, sbappendaddr() should be used. To place 368 * access rights and data in a socket receive buffer, sbappendrights() 369 * should be used. In either case, the new data begins a new record. 370 * Note that unlike sbappend() and sbappendrecord(), these routines check 371 * for the caller that there will be enough space to store the data. 372 * Each fails if there is not enough space, or if it cannot find mbufs 373 * to store additional information in. 374 * 375 * Reliable protocols may use the socket send buffer to hold data 376 * awaiting acknowledgement. Data is normally copied from a socket 377 * send buffer in a protocol with m_copy for output to a peer, 378 * and then removing the data from the socket buffer with sbdrop() 379 * or sbdroprecord() when the data is acknowledged by the peer. 380 */ 381 382 /* 383 * Append mbuf chain m to the last record in the 384 * socket buffer sb. The additional space associated 385 * the mbuf chain is recorded in sb. Empty mbufs are 386 * discarded and mbufs are compacted where possible. 387 */ 388 sbappend(sb, m) 389 struct sockbuf *sb; 390 struct mbuf *m; 391 { 392 register struct mbuf *n; 393 394 if (m == 0) 395 return; 396 if (n = sb->sb_mb) { 397 while (n->m_nextpkt) 398 n = n->m_nextpkt; 399 while (n->m_next) 400 n = n->m_next; 401 } 402 sbcompress(sb, m, n); 403 } 404 405 /* 406 * As above, except the mbuf chain 407 * begins a new record. 408 */ 409 sbappendrecord(sb, m0) 410 register struct sockbuf *sb; 411 register struct mbuf *m0; 412 { 413 register struct mbuf *m; 414 415 if (m0 == 0) 416 return; 417 if (m = sb->sb_mb) 418 while (m->m_nextpkt) 419 m = m->m_nextpkt; 420 /* 421 * Put the first mbuf on the queue. 422 * Note this permits zero length records. 423 */ 424 sballoc(sb, m0); 425 if (m) 426 m->m_nextpkt = m0; 427 else 428 sb->sb_mb = m0; 429 m = m0->m_next; 430 m0->m_next = 0; 431 if (m && (m0->m_flags & M_EOR)) { 432 m0->m_flags &= ~M_EOR; 433 m->m_flags |= M_EOR; 434 } 435 sbcompress(sb, m, m0); 436 } 437 438 /* 439 * As above except that OOB data 440 * is inserted at the beginning of the sockbuf, 441 * but after any other OOB data. 442 */ 443 sbinsertoob(sb, m0) 444 register struct sockbuf *sb; 445 register struct mbuf *m0; 446 { 447 register struct mbuf *m; 448 register struct mbuf **mp; 449 450 if (m0 == 0) 451 return; 452 for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) { 453 again: 454 switch (m->m_type) { 455 456 case MT_OOBDATA: 457 continue; /* WANT next train */ 458 459 case MT_CONTROL: 460 if (m = m->m_next) 461 goto again; /* inspect THIS train further */ 462 } 463 break; 464 } 465 /* 466 * Put the first mbuf on the queue. 467 * Note this permits zero length records. 468 */ 469 sballoc(sb, m0); 470 m0->m_nextpkt = *mp; 471 *mp = m0; 472 m = m0->m_next; 473 m0->m_next = 0; 474 if (m && (m0->m_flags & M_EOR)) { 475 m0->m_flags &= ~M_EOR; 476 m->m_flags |= M_EOR; 477 } 478 sbcompress(sb, m, m0); 479 } 480 481 /* 482 * Append address and data, and optionally, rights 483 * to the receive queue of a socket. If present, 484 * m0 Return 0 if 485 * no space in sockbuf or insufficient mbufs. 486 */ 487 sbappendaddr(sb, asa, m0, rights0) 488 register struct sockbuf *sb; 489 struct sockaddr *asa; 490 struct mbuf *m0, *rights0; 491 { 492 register struct mbuf *m, *n; 493 int space = asa->sa_len; 494 495 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 496 panic("sbappendaddr"); 497 if (m0) 498 space += m0->m_pkthdr.len; 499 if (rights0) 500 space += rights0->m_len; 501 if (space > sbspace(sb)) 502 return (0); 503 MGET(m, M_DONTWAIT, MT_SONAME); 504 if (m == 0) 505 return (0); 506 if (asa->sa_len > MLEN) { 507 (void) m_free(m); 508 return (0); 509 } 510 m->m_len = asa->sa_len; 511 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); 512 if (rights0 && rights0->m_len) { 513 m->m_next = m_copy(rights0, 0, rights0->m_len); 514 if (m->m_next == 0) { 515 m_freem(m); 516 return (0); 517 } 518 sballoc(sb, m->m_next); 519 } 520 sballoc(sb, m); 521 if (n = sb->sb_mb) { 522 while (n->m_nextpkt) 523 n = n->m_nextpkt; 524 n->m_nextpkt = m; 525 } else 526 sb->sb_mb = m; 527 if (m->m_next) 528 m = m->m_next; 529 if (m0) 530 sbcompress(sb, m0, m); 531 return (1); 532 } 533 534 sbappendrights(sb, m0, rights) 535 struct sockbuf *sb; 536 struct mbuf *rights, *m0; 537 { 538 register struct mbuf *m, *n; 539 int space = 0; 540 541 if (rights == 0) 542 panic("sbappendrights"); 543 for (m = m0; m; m = m->m_next) 544 space += m->m_len; 545 space += rights->m_len; 546 if (space > sbspace(sb)) 547 return (0); 548 m = m_copy(rights, 0, rights->m_len); 549 if (m == 0) 550 return (0); 551 sballoc(sb, m); 552 if (n = sb->sb_mb) { 553 while (n->m_nextpkt) 554 n = n->m_nextpkt; 555 n->m_nextpkt = m; 556 } else 557 sb->sb_mb = m; 558 if (m0) 559 sbcompress(sb, m0, m); 560 return (1); 561 } 562 563 /* 564 * Compress mbuf chain m into the socket 565 * buffer sb following mbuf n. If n 566 * is null, the buffer is presumed empty. 567 */ 568 sbcompress(sb, m, n) 569 register struct sockbuf *sb; 570 register struct mbuf *m, *n; 571 { 572 573 register int eor = 0; 574 while (m) { 575 eor |= m->m_flags & M_EOR; 576 if (m->m_len == 0) { 577 m = m_free(m); 578 continue; 579 } 580 if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && 581 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && 582 n->m_type == m->m_type) { 583 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 584 (unsigned)m->m_len); 585 n->m_len += m->m_len; 586 sb->sb_cc += m->m_len; 587 m = m_free(m); 588 continue; 589 } 590 if (n) 591 n->m_next = m; 592 else 593 sb->sb_mb = m; 594 sballoc(sb, m); 595 n = m; 596 m->m_flags &= ~M_EOR; 597 m = m->m_next; 598 n->m_next = 0; 599 } 600 if (n) 601 n->m_flags |= eor; 602 } 603 604 /* 605 * Free all mbufs in a sockbuf. 606 * Check that all resources are reclaimed. 607 */ 608 sbflush(sb) 609 register struct sockbuf *sb; 610 { 611 612 if (sb->sb_flags & SB_LOCK) 613 panic("sbflush"); 614 while (sb->sb_mbcnt) 615 sbdrop(sb, (int)sb->sb_cc); 616 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 617 panic("sbflush 2"); 618 } 619 620 /* 621 * Drop data from (the front of) a sockbuf. 622 */ 623 sbdrop(sb, len) 624 register struct sockbuf *sb; 625 register int len; 626 { 627 register struct mbuf *m, *mn; 628 struct mbuf *next; 629 630 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 631 while (len > 0) { 632 if (m == 0) { 633 if (next == 0) 634 panic("sbdrop"); 635 m = next; 636 next = m->m_nextpkt; 637 continue; 638 } 639 if (m->m_len > len) { 640 m->m_len -= len; 641 m->m_data += len; 642 sb->sb_cc -= len; 643 break; 644 } 645 len -= m->m_len; 646 sbfree(sb, m); 647 MFREE(m, mn); 648 m = mn; 649 } 650 while (m && m->m_len == 0) { 651 sbfree(sb, m); 652 MFREE(m, mn); 653 m = mn; 654 } 655 if (m) { 656 sb->sb_mb = m; 657 m->m_nextpkt = next; 658 } else 659 sb->sb_mb = next; 660 } 661 662 /* 663 * Drop a record off the front of a sockbuf 664 * and move the next record to the front. 665 */ 666 sbdroprecord(sb) 667 register struct sockbuf *sb; 668 { 669 register struct mbuf *m, *mn; 670 671 m = sb->sb_mb; 672 if (m) { 673 sb->sb_mb = m->m_nextpkt; 674 do { 675 sbfree(sb, m); 676 MFREE(m, mn); 677 } while (m = mn); 678 } 679 } 680