1 /* 2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)uipc_socket2.c 7.7 (Berkeley) 10/12/88 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "dir.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "inode.h" 27 #include "buf.h" 28 #include "malloc.h" 29 #include "mbuf.h" 30 #include "protosw.h" 31 #include "socket.h" 32 #include "socketvar.h" 33 34 /* 35 * Primitive routines for operating on sockets and socket buffers 36 */ 37 38 /* 39 * Procedures to manipulate state flags of socket 40 * and do appropriate wakeups. Normal sequence from the 41 * active (originating) side is that soisconnecting() is 42 * called during processing of connect() call, 43 * resulting in an eventual call to soisconnected() if/when the 44 * connection is established. When the connection is torn down 45 * soisdisconnecting() is called during processing of disconnect() call, 46 * and soisdisconnected() is called when the connection to the peer 47 * is totally severed. The semantics of these routines are such that 48 * connectionless protocols can call soisconnected() and soisdisconnected() 49 * only, bypassing the in-progress calls when setting up a ``connection'' 50 * takes no time. 51 * 52 * From the passive side, a socket is created with 53 * two queues of sockets: so_q0 for connections in progress 54 * and so_q for connections already made and awaiting user acceptance. 55 * As a protocol is preparing incoming connections, it creates a socket 56 * structure queued on so_q0 by calling sonewconn(). When the connection 57 * is established, soisconnected() is called, and transfers the 58 * socket structure to so_q, making it available to accept(). 59 * 60 * If a socket is closed with sockets on either 61 * so_q0 or so_q, these sockets are dropped. 62 * 63 * If higher level protocols are implemented in 64 * the kernel, the wakeups done here will sometimes 65 * cause software-interrupt process scheduling. 66 */ 67 68 soisconnecting(so) 69 register struct socket *so; 70 { 71 72 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 73 so->so_state |= SS_ISCONNECTING; 74 wakeup((caddr_t)&so->so_timeo); 75 } 76 77 soisconnected(so) 78 register struct socket *so; 79 { 80 register struct socket *head = so->so_head; 81 82 if (head) { 83 if (soqremque(so, 0) == 0) 84 panic("soisconnected"); 85 soqinsque(head, so, 1); 86 sorwakeup(head); 87 wakeup((caddr_t)&head->so_timeo); 88 } 89 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 90 so->so_state |= SS_ISCONNECTED; 91 wakeup((caddr_t)&so->so_timeo); 92 sorwakeup(so); 93 sowwakeup(so); 94 } 95 96 soisdisconnecting(so) 97 register struct socket *so; 98 { 99 100 so->so_state &= ~SS_ISCONNECTING; 101 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 102 wakeup((caddr_t)&so->so_timeo); 103 sowwakeup(so); 104 sorwakeup(so); 105 } 106 107 soisdisconnected(so) 108 register struct socket *so; 109 { 110 111 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 112 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 113 wakeup((caddr_t)&so->so_timeo); 114 sowwakeup(so); 115 sorwakeup(so); 116 } 117 118 /* 119 * When an attempt at a new connection is noted on a socket 120 * which accepts connections, sonewconn is called. If the 121 * connection is possible (subject to space constraints, etc.) 122 * then we allocate a new structure, propoerly linked into the 123 * data structure of the original socket, and return this. 124 */ 125 struct socket * 126 sonewconn(head) 127 register struct socket *head; 128 { 129 register struct socket *so; 130 register struct mbuf *m; 131 132 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 133 goto bad; 134 m = m_getclr(M_DONTWAIT, MT_SOCKET); 135 if (m == NULL) 136 goto bad; 137 so = mtod(m, struct socket *); 138 so->so_type = head->so_type; 139 so->so_options = head->so_options &~ SO_ACCEPTCONN; 140 so->so_linger = head->so_linger; 141 so->so_state = head->so_state | SS_NOFDREF; 142 so->so_proto = head->so_proto; 143 so->so_timeo = head->so_timeo; 144 so->so_pgid = head->so_pgid; 145 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 146 soqinsque(head, so, 0); 147 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 148 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 149 (void) soqremque(so, 0); 150 (void) m_free(m); 151 goto bad; 152 } 153 return (so); 154 bad: 155 return ((struct socket *)0); 156 } 157 158 soqinsque(head, so, q) 159 register struct socket *head, *so; 160 int q; 161 { 162 163 so->so_head = head; 164 if (q == 0) { 165 head->so_q0len++; 166 so->so_q0 = head->so_q0; 167 head->so_q0 = so; 168 } else { 169 head->so_qlen++; 170 so->so_q = head->so_q; 171 head->so_q = so; 172 } 173 } 174 175 soqremque(so, q) 176 register struct socket *so; 177 int q; 178 { 179 register struct socket *head, *prev, *next; 180 181 head = so->so_head; 182 prev = head; 183 for (;;) { 184 next = q ? prev->so_q : prev->so_q0; 185 if (next == so) 186 break; 187 if (next == head) 188 return (0); 189 prev = next; 190 } 191 if (q == 0) { 192 prev->so_q0 = next->so_q0; 193 head->so_q0len--; 194 } else { 195 prev->so_q = next->so_q; 196 head->so_qlen--; 197 } 198 next->so_q0 = next->so_q = 0; 199 next->so_head = 0; 200 return (1); 201 } 202 203 /* 204 * Socantsendmore indicates that no more data will be sent on the 205 * socket; it would normally be applied to a socket when the user 206 * informs the system that no more data is to be sent, by the protocol 207 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 208 * will be received, and will normally be applied to the socket by a 209 * protocol when it detects that the peer will send no more data. 210 * Data queued for reading in the socket may yet be read. 211 */ 212 213 socantsendmore(so) 214 struct socket *so; 215 { 216 217 so->so_state |= SS_CANTSENDMORE; 218 sowwakeup(so); 219 } 220 221 socantrcvmore(so) 222 struct socket *so; 223 { 224 225 so->so_state |= SS_CANTRCVMORE; 226 sorwakeup(so); 227 } 228 229 /* 230 * Socket select/wakeup routines. 231 */ 232 233 /* 234 * Queue a process for a select on a socket buffer. 235 */ 236 sbselqueue(sb) 237 struct sockbuf *sb; 238 { 239 struct proc *p; 240 241 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 242 sb->sb_flags |= SB_COLL; 243 else 244 sb->sb_sel = u.u_procp; 245 } 246 247 /* 248 * Wait for data to arrive at/drain from a socket buffer. 249 */ 250 sbwait(sb) 251 struct sockbuf *sb; 252 { 253 254 sb->sb_flags |= SB_WAIT; 255 sleep((caddr_t)&sb->sb_cc, PZERO+1); 256 } 257 258 /* 259 * Wakeup processes waiting on a socket buffer. 260 * Do asynchronous notification via SIGIO 261 * if the socket has the SS_ASYNC flag set. 262 */ 263 sowakeup(so, sb) 264 register struct socket *so; 265 register struct sockbuf *sb; 266 { 267 register struct proc *p; 268 269 if (sb->sb_sel) { 270 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 271 sb->sb_sel = 0; 272 sb->sb_flags &= ~SB_COLL; 273 } 274 if (sb->sb_flags & SB_WAIT) { 275 sb->sb_flags &= ~SB_WAIT; 276 wakeup((caddr_t)&sb->sb_cc); 277 } 278 if (so->so_state & SS_ASYNC) { 279 if (so->so_pgid < 0) 280 gsignal(-so->so_pgid, SIGIO); 281 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 282 psignal(p, SIGIO); 283 } 284 } 285 286 /* 287 * Socket buffer (struct sockbuf) utility routines. 288 * 289 * Each socket contains two socket buffers: one for sending data and 290 * one for receiving data. Each buffer contains a queue of mbufs, 291 * information about the number of mbufs and amount of data in the 292 * queue, and other fields allowing select() statements and notification 293 * on data availability to be implemented. 294 * 295 * Data stored in a socket buffer is maintained as a list of records. 296 * Each record is a list of mbufs chained together with the m_next 297 * field. Records are chained together with the m_nextpkt field. The upper 298 * level routine soreceive() expects the following conventions to be 299 * observed when placing information in the receive buffer: 300 * 301 * 1. If the protocol requires each message be preceded by the sender's 302 * name, then a record containing that name must be present before 303 * any associated data (mbuf's must be of type MT_SONAME). 304 * 2. If the protocol supports the exchange of ``access rights'' (really 305 * just additional data associated with the message), and there are 306 * ``rights'' to be received, then a record containing this data 307 * should be present (mbuf's must be of type MT_RIGHTS). 308 * 3. If a name or rights record exists, then it must be followed by 309 * a data record, perhaps of zero length. 310 * 311 * Before using a new socket structure it is first necessary to reserve 312 * buffer space to the socket, by calling sbreserve(). This should commit 313 * some of the available buffer space in the system buffer pool for the 314 * socket (currently, it does nothing but enforce limits). The space 315 * should be released by calling sbrelease() when the socket is destroyed. 316 */ 317 318 soreserve(so, sndcc, rcvcc) 319 register struct socket *so; 320 u_long sndcc, rcvcc; 321 { 322 323 if (sbreserve(&so->so_snd, sndcc) == 0) 324 goto bad; 325 if (sbreserve(&so->so_rcv, rcvcc) == 0) 326 goto bad2; 327 return (0); 328 bad2: 329 sbrelease(&so->so_snd); 330 bad: 331 return (ENOBUFS); 332 } 333 334 /* 335 * Allot mbufs to a sockbuf. 336 * Attempt to scale cc so that mbcnt doesn't become limiting 337 * if buffering efficiency is near the normal case. 338 */ 339 sbreserve(sb, cc) 340 struct sockbuf *sb; 341 u_long cc; 342 { 343 344 if (cc > (u_long)SB_MAX * MCLBYTES / (2 * MSIZE + MCLBYTES)) 345 return (0); 346 sb->sb_hiwat = cc; 347 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 348 return (1); 349 } 350 351 /* 352 * Free mbufs held by a socket, and reserved mbuf space. 353 */ 354 sbrelease(sb) 355 struct sockbuf *sb; 356 { 357 358 sbflush(sb); 359 sb->sb_hiwat = sb->sb_mbmax = 0; 360 } 361 362 /* 363 * Routines to add and remove 364 * data from an mbuf queue. 365 * 366 * The routines sbappend() or sbappendrecord() are normally called to 367 * append new mbufs to a socket buffer, after checking that adequate 368 * space is available, comparing the function sbspace() with the amount 369 * of data to be added. sbappendrecord() differs from sbappend() in 370 * that data supplied is treated as the beginning of a new record. 371 * To place a sender's address, optional access rights, and data in a 372 * socket receive buffer, sbappendaddr() should be used. To place 373 * access rights and data in a socket receive buffer, sbappendrights() 374 * should be used. In either case, the new data begins a new record. 375 * Note that unlike sbappend() and sbappendrecord(), these routines check 376 * for the caller that there will be enough space to store the data. 377 * Each fails if there is not enough space, or if it cannot find mbufs 378 * to store additional information in. 379 * 380 * Reliable protocols may use the socket send buffer to hold data 381 * awaiting acknowledgement. Data is normally copied from a socket 382 * send buffer in a protocol with m_copy for output to a peer, 383 * and then removing the data from the socket buffer with sbdrop() 384 * or sbdroprecord() when the data is acknowledged by the peer. 385 */ 386 387 /* 388 * Append mbuf chain m to the last record in the 389 * socket buffer sb. The additional space associated 390 * the mbuf chain is recorded in sb. Empty mbufs are 391 * discarded and mbufs are compacted where possible. 392 */ 393 sbappend(sb, m) 394 struct sockbuf *sb; 395 struct mbuf *m; 396 { 397 register struct mbuf *n; 398 399 if (m == 0) 400 return; 401 if (n = sb->sb_mb) { 402 while (n->m_nextpkt) 403 n = n->m_nextpkt; 404 while (n->m_next) 405 n = n->m_next; 406 } 407 sbcompress(sb, m, n); 408 } 409 410 /* 411 * As above, except the mbuf chain 412 * begins a new record. 413 */ 414 sbappendrecord(sb, m0) 415 register struct sockbuf *sb; 416 register struct mbuf *m0; 417 { 418 register struct mbuf *m; 419 420 if (m0 == 0) 421 return; 422 if (m = sb->sb_mb) 423 while (m->m_nextpkt) 424 m = m->m_nextpkt; 425 /* 426 * Put the first mbuf on the queue. 427 * Note this permits zero length records. 428 */ 429 sballoc(sb, m0); 430 if (m) 431 m->m_nextpkt = m0; 432 else 433 sb->sb_mb = m0; 434 m = m0->m_next; 435 m0->m_next = 0; 436 sbcompress(sb, m, m0); 437 } 438 439 /* 440 * Append address and data, and optionally, rights 441 * to the receive queue of a socket. If present, 442 * m0 Return 0 if 443 * no space in sockbuf or insufficient mbufs. 444 */ 445 sbappendaddr(sb, asa, m0, rights0) 446 register struct sockbuf *sb; 447 struct sockaddr *asa; 448 struct mbuf *m0, *rights0; 449 { 450 register struct mbuf *m, *n; 451 int space = sizeof (*asa); 452 453 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 454 panic("sbappendaddr"); 455 if (m0) 456 space += m0->m_pkthdr.len; 457 if (rights0) 458 space += rights0->m_len; 459 if (space > sbspace(sb)) 460 return (0); 461 MGET(m, M_DONTWAIT, MT_SONAME); 462 if (m == 0) 463 return (0); 464 *mtod(m, struct sockaddr *) = *asa; 465 m->m_len = sizeof (*asa); 466 if (rights0 && rights0->m_len) { 467 m->m_next = m_copy(rights0, 0, rights0->m_len); 468 if (m->m_next == 0) { 469 m_freem(m); 470 return (0); 471 } 472 sballoc(sb, m->m_next); 473 } 474 sballoc(sb, m); 475 if (n = sb->sb_mb) { 476 while (n->m_nextpkt) 477 n = n->m_nextpkt; 478 n->m_nextpkt = m; 479 } else 480 sb->sb_mb = m; 481 if (m->m_next) 482 m = m->m_next; 483 if (m0) 484 sbcompress(sb, m0, m); 485 return (1); 486 } 487 488 sbappendrights(sb, m0, rights) 489 struct sockbuf *sb; 490 struct mbuf *rights, *m0; 491 { 492 register struct mbuf *m, *n; 493 int space = 0; 494 495 if (rights == 0) 496 panic("sbappendrights"); 497 for (m = m0; m; m = m->m_next) 498 space += m->m_len; 499 space += rights->m_len; 500 if (space > sbspace(sb)) 501 return (0); 502 m = m_copy(rights, 0, rights->m_len); 503 if (m == 0) 504 return (0); 505 sballoc(sb, m); 506 if (n = sb->sb_mb) { 507 while (n->m_nextpkt) 508 n = n->m_nextpkt; 509 n->m_nextpkt = m; 510 } else 511 sb->sb_mb = m; 512 if (m0) 513 sbcompress(sb, m0, m); 514 return (1); 515 } 516 517 /* 518 * Compress mbuf chain m into the socket 519 * buffer sb following mbuf n. If n 520 * is null, the buffer is presumed empty. 521 */ 522 sbcompress(sb, m, n) 523 register struct sockbuf *sb; 524 register struct mbuf *m, *n; 525 { 526 527 while (m) { 528 if (m->m_len == 0) { 529 m = m_free(m); 530 continue; 531 } 532 if (n && (n->m_flags & M_EXT) == 0 && 533 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && 534 n->m_type == m->m_type) { 535 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 536 (unsigned)m->m_len); 537 n->m_len += m->m_len; 538 sb->sb_cc += m->m_len; 539 m = m_free(m); 540 continue; 541 } 542 sballoc(sb, m); 543 if (n) 544 n->m_next = m; 545 else 546 sb->sb_mb = m; 547 n = m; 548 m = m->m_next; 549 n->m_next = 0; 550 } 551 } 552 553 /* 554 * Free all mbufs in a sockbuf. 555 * Check that all resources are reclaimed. 556 */ 557 sbflush(sb) 558 register struct sockbuf *sb; 559 { 560 561 if (sb->sb_flags & SB_LOCK) 562 panic("sbflush"); 563 while (sb->sb_mbcnt) 564 sbdrop(sb, (int)sb->sb_cc); 565 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 566 panic("sbflush 2"); 567 } 568 569 /* 570 * Drop data from (the front of) a sockbuf. 571 */ 572 sbdrop(sb, len) 573 register struct sockbuf *sb; 574 register int len; 575 { 576 register struct mbuf *m, *mn; 577 struct mbuf *next; 578 579 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 580 while (len > 0) { 581 if (m == 0) { 582 if (next == 0) 583 panic("sbdrop"); 584 m = next; 585 next = m->m_nextpkt; 586 continue; 587 } 588 if (m->m_len > len) { 589 m->m_len -= len; 590 m->m_data += len; 591 sb->sb_cc -= len; 592 break; 593 } 594 len -= m->m_len; 595 sbfree(sb, m); 596 MFREE(m, mn); 597 m = mn; 598 } 599 while (m && m->m_len == 0) { 600 sbfree(sb, m); 601 MFREE(m, mn); 602 m = mn; 603 } 604 if (m) { 605 sb->sb_mb = m; 606 m->m_nextpkt = next; 607 } else 608 sb->sb_mb = next; 609 } 610 611 /* 612 * Drop a record off the front of a sockbuf 613 * and move the next record to the front. 614 */ 615 sbdroprecord(sb) 616 register struct sockbuf *sb; 617 { 618 register struct mbuf *m, *mn; 619 620 m = sb->sb_mb; 621 if (m) { 622 sb->sb_mb = m->m_nextpkt; 623 do { 624 sbfree(sb, m); 625 MFREE(m, mn); 626 } while (m = mn); 627 } 628 } 629