1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that this notice is preserved and that due credit is given 7 * to the University of California at Berkeley. The name of the University 8 * may not be used to endorse or promote products derived from this 9 * software without specific prior written permission. This software 10 * is provided ``as is'' without express or implied warranty. 11 * 12 * @(#)uipc_socket2.c 7.2 (Berkeley) 12/30/87 13 */ 14 15 #include "param.h" 16 #include "systm.h" 17 #include "dir.h" 18 #include "user.h" 19 #include "proc.h" 20 #include "file.h" 21 #include "inode.h" 22 #include "buf.h" 23 #include "mbuf.h" 24 #include "protosw.h" 25 #include "socket.h" 26 #include "socketvar.h" 27 28 /* 29 * Primitive routines for operating on sockets and socket buffers 30 */ 31 32 /* 33 * Procedures to manipulate state flags of socket 34 * and do appropriate wakeups. Normal sequence from the 35 * active (originating) side is that soisconnecting() is 36 * called during processing of connect() call, 37 * resulting in an eventual call to soisconnected() if/when the 38 * connection is established. When the connection is torn down 39 * soisdisconnecting() is called during processing of disconnect() call, 40 * and soisdisconnected() is called when the connection to the peer 41 * is totally severed. The semantics of these routines are such that 42 * connectionless protocols can call soisconnected() and soisdisconnected() 43 * only, bypassing the in-progress calls when setting up a ``connection'' 44 * takes no time. 45 * 46 * From the passive side, a socket is created with 47 * two queues of sockets: so_q0 for connections in progress 48 * and so_q for connections already made and awaiting user acceptance. 49 * As a protocol is preparing incoming connections, it creates a socket 50 * structure queued on so_q0 by calling sonewconn(). When the connection 51 * is established, soisconnected() is called, and transfers the 52 * socket structure to so_q, making it available to accept(). 53 * 54 * If a socket is closed with sockets on either 55 * so_q0 or so_q, these sockets are dropped. 56 * 57 * If higher level protocols are implemented in 58 * the kernel, the wakeups done here will sometimes 59 * cause software-interrupt process scheduling. 60 */ 61 62 soisconnecting(so) 63 register struct socket *so; 64 { 65 66 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 67 so->so_state |= SS_ISCONNECTING; 68 wakeup((caddr_t)&so->so_timeo); 69 } 70 71 soisconnected(so) 72 register struct socket *so; 73 { 74 register struct socket *head = so->so_head; 75 76 if (head) { 77 if (soqremque(so, 0) == 0) 78 panic("soisconnected"); 79 soqinsque(head, so, 1); 80 sorwakeup(head); 81 wakeup((caddr_t)&head->so_timeo); 82 } 83 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 84 so->so_state |= SS_ISCONNECTED; 85 wakeup((caddr_t)&so->so_timeo); 86 sorwakeup(so); 87 sowwakeup(so); 88 } 89 90 soisdisconnecting(so) 91 register struct socket *so; 92 { 93 94 so->so_state &= ~SS_ISCONNECTING; 95 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 96 wakeup((caddr_t)&so->so_timeo); 97 sowwakeup(so); 98 sorwakeup(so); 99 } 100 101 soisdisconnected(so) 102 register struct socket *so; 103 { 104 105 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 106 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 107 wakeup((caddr_t)&so->so_timeo); 108 sowwakeup(so); 109 sorwakeup(so); 110 } 111 112 /* 113 * When an attempt at a new connection is noted on a socket 114 * which accepts connections, sonewconn is called. If the 115 * connection is possible (subject to space constraints, etc.) 116 * then we allocate a new structure, propoerly linked into the 117 * data structure of the original socket, and return this. 118 */ 119 struct socket * 120 sonewconn(head) 121 register struct socket *head; 122 { 123 register struct socket *so; 124 register struct mbuf *m; 125 126 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 127 goto bad; 128 m = m_getclr(M_DONTWAIT, MT_SOCKET); 129 if (m == NULL) 130 goto bad; 131 so = mtod(m, struct socket *); 132 so->so_type = head->so_type; 133 so->so_options = head->so_options &~ SO_ACCEPTCONN; 134 so->so_linger = head->so_linger; 135 so->so_state = head->so_state | SS_NOFDREF; 136 so->so_proto = head->so_proto; 137 so->so_timeo = head->so_timeo; 138 so->so_pgrp = head->so_pgrp; 139 soqinsque(head, so, 0); 140 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 141 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 142 (void) soqremque(so, 0); 143 (void) m_free(m); 144 goto bad; 145 } 146 return (so); 147 bad: 148 return ((struct socket *)0); 149 } 150 151 soqinsque(head, so, q) 152 register struct socket *head, *so; 153 int q; 154 { 155 156 so->so_head = head; 157 if (q == 0) { 158 head->so_q0len++; 159 so->so_q0 = head->so_q0; 160 head->so_q0 = so; 161 } else { 162 head->so_qlen++; 163 so->so_q = head->so_q; 164 head->so_q = so; 165 } 166 } 167 168 soqremque(so, q) 169 register struct socket *so; 170 int q; 171 { 172 register struct socket *head, *prev, *next; 173 174 head = so->so_head; 175 prev = head; 176 for (;;) { 177 next = q ? prev->so_q : prev->so_q0; 178 if (next == so) 179 break; 180 if (next == head) 181 return (0); 182 prev = next; 183 } 184 if (q == 0) { 185 prev->so_q0 = next->so_q0; 186 head->so_q0len--; 187 } else { 188 prev->so_q = next->so_q; 189 head->so_qlen--; 190 } 191 next->so_q0 = next->so_q = 0; 192 next->so_head = 0; 193 return (1); 194 } 195 196 /* 197 * Socantsendmore indicates that no more data will be sent on the 198 * socket; it would normally be applied to a socket when the user 199 * informs the system that no more data is to be sent, by the protocol 200 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 201 * will be received, and will normally be applied to the socket by a 202 * protocol when it detects that the peer will send no more data. 203 * Data queued for reading in the socket may yet be read. 204 */ 205 206 socantsendmore(so) 207 struct socket *so; 208 { 209 210 so->so_state |= SS_CANTSENDMORE; 211 sowwakeup(so); 212 } 213 214 socantrcvmore(so) 215 struct socket *so; 216 { 217 218 so->so_state |= SS_CANTRCVMORE; 219 sorwakeup(so); 220 } 221 222 /* 223 * Socket select/wakeup routines. 224 */ 225 226 /* 227 * Queue a process for a select on a socket buffer. 228 */ 229 sbselqueue(sb) 230 struct sockbuf *sb; 231 { 232 register struct proc *p; 233 234 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 235 sb->sb_flags |= SB_COLL; 236 else 237 sb->sb_sel = u.u_procp; 238 } 239 240 /* 241 * Wait for data to arrive at/drain from a socket buffer. 242 */ 243 sbwait(sb) 244 struct sockbuf *sb; 245 { 246 247 sb->sb_flags |= SB_WAIT; 248 sleep((caddr_t)&sb->sb_cc, PZERO+1); 249 } 250 251 /* 252 * Wakeup processes waiting on a socket buffer. 253 */ 254 sbwakeup(sb) 255 register struct sockbuf *sb; 256 { 257 258 if (sb->sb_sel) { 259 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 260 sb->sb_sel = 0; 261 sb->sb_flags &= ~SB_COLL; 262 } 263 if (sb->sb_flags & SB_WAIT) { 264 sb->sb_flags &= ~SB_WAIT; 265 wakeup((caddr_t)&sb->sb_cc); 266 } 267 } 268 269 /* 270 * Wakeup socket readers and writers. 271 * Do asynchronous notification via SIGIO 272 * if the socket has the SS_ASYNC flag set. 273 */ 274 sowakeup(so, sb) 275 register struct socket *so; 276 struct sockbuf *sb; 277 { 278 register struct proc *p; 279 280 sbwakeup(sb); 281 if (so->so_state & SS_ASYNC) { 282 if (so->so_pgrp < 0) 283 gsignal(-so->so_pgrp, SIGIO); 284 else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0) 285 psignal(p, SIGIO); 286 } 287 } 288 289 /* 290 * Socket buffer (struct sockbuf) utility routines. 291 * 292 * Each socket contains two socket buffers: one for sending data and 293 * one for receiving data. Each buffer contains a queue of mbufs, 294 * information about the number of mbufs and amount of data in the 295 * queue, and other fields allowing select() statements and notification 296 * on data availability to be implemented. 297 * 298 * Data stored in a socket buffer is maintained as a list of records. 299 * Each record is a list of mbufs chained together with the m_next 300 * field. Records are chained together with the m_act field. The upper 301 * level routine soreceive() expects the following conventions to be 302 * observed when placing information in the receive buffer: 303 * 304 * 1. If the protocol requires each message be preceded by the sender's 305 * name, then a record containing that name must be present before 306 * any associated data (mbuf's must be of type MT_SONAME). 307 * 2. If the protocol supports the exchange of ``access rights'' (really 308 * just additional data associated with the message), and there are 309 * ``rights'' to be received, then a record containing this data 310 * should be present (mbuf's must be of type MT_RIGHTS). 311 * 3. If a name or rights record exists, then it must be followed by 312 * a data record, perhaps of zero length. 313 * 314 * Before using a new socket structure it is first necessary to reserve 315 * buffer space to the socket, by calling sbreserve(). This commits 316 * some of the available buffer space in the system buffer pool for the 317 * socket. The space should be released by calling sbrelease() when the 318 * socket is destroyed. 319 */ 320 321 soreserve(so, sndcc, rcvcc) 322 register struct socket *so; 323 int sndcc, rcvcc; 324 { 325 326 if (sbreserve(&so->so_snd, sndcc) == 0) 327 goto bad; 328 if (sbreserve(&so->so_rcv, rcvcc) == 0) 329 goto bad2; 330 return (0); 331 bad2: 332 sbrelease(&so->so_snd); 333 bad: 334 return (ENOBUFS); 335 } 336 337 /* 338 * Allot mbufs to a sockbuf. 339 * Attempt to scale cc so that mbcnt doesn't become limiting 340 * if buffering efficiency is near the normal case. 341 */ 342 sbreserve(sb, cc) 343 struct sockbuf *sb; 344 { 345 346 if ((unsigned) cc > (unsigned)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES)) 347 return (0); 348 sb->sb_hiwat = cc; 349 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 350 return (1); 351 } 352 353 /* 354 * Free mbufs held by a socket, and reserved mbuf space. 355 */ 356 sbrelease(sb) 357 struct sockbuf *sb; 358 { 359 360 sbflush(sb); 361 sb->sb_hiwat = sb->sb_mbmax = 0; 362 } 363 364 /* 365 * Routines to add and remove 366 * data from an mbuf queue. 367 * 368 * The routines sbappend() or sbappendrecord() are normally called to 369 * append new mbufs to a socket buffer, after checking that adequate 370 * space is available, comparing the function sbspace() with the amount 371 * of data to be added. sbappendrecord() differs from sbappend() in 372 * that data supplied is treated as the beginning of a new record. 373 * To place a sender's address, optional access rights, and data in a 374 * socket receive buffer, sbappendaddr() should be used. To place 375 * access rights and data in a socket receive buffer, sbappendrights() 376 * should be used. In either case, the new data begins a new record. 377 * Note that unlike sbappend() and sbappendrecord(), these routines check 378 * for the caller that there will be enough space to store the data. 379 * Each fails if there is not enough space, or if it cannot find mbufs 380 * to store additional information in. 381 * 382 * Reliable protocols may use the socket send buffer to hold data 383 * awaiting acknowledgement. Data is normally copied from a socket 384 * send buffer in a protocol with m_copy for output to a peer, 385 * and then removing the data from the socket buffer with sbdrop() 386 * or sbdroprecord() when the data is acknowledged by the peer. 387 */ 388 389 /* 390 * Append mbuf chain m to the last record in the 391 * socket buffer sb. The additional space associated 392 * the mbuf chain is recorded in sb. Empty mbufs are 393 * discarded and mbufs are compacted where possible. 394 */ 395 sbappend(sb, m) 396 struct sockbuf *sb; 397 struct mbuf *m; 398 { 399 register struct mbuf *n; 400 401 if (m == 0) 402 return; 403 if (n = sb->sb_mb) { 404 while (n->m_act) 405 n = n->m_act; 406 while (n->m_next) 407 n = n->m_next; 408 } 409 sbcompress(sb, m, n); 410 } 411 412 /* 413 * As above, except the mbuf chain 414 * begins a new record. 415 */ 416 sbappendrecord(sb, m0) 417 register struct sockbuf *sb; 418 register struct mbuf *m0; 419 { 420 register struct mbuf *m; 421 422 if (m0 == 0) 423 return; 424 if (m = sb->sb_mb) 425 while (m->m_act) 426 m = m->m_act; 427 /* 428 * Put the first mbuf on the queue. 429 * Note this permits zero length records. 430 */ 431 sballoc(sb, m0); 432 if (m) 433 m->m_act = m0; 434 else 435 sb->sb_mb = m0; 436 m = m0->m_next; 437 m0->m_next = 0; 438 sbcompress(sb, m, m0); 439 } 440 441 /* 442 * Append address and data, and optionally, rights 443 * to the receive queue of a socket. Return 0 if 444 * no space in sockbuf or insufficient mbufs. 445 */ 446 sbappendaddr(sb, asa, m0, rights0) 447 register struct sockbuf *sb; 448 struct sockaddr *asa; 449 struct mbuf *rights0, *m0; 450 { 451 register struct mbuf *m, *n; 452 int space = sizeof (*asa); 453 454 for (m = m0; m; m = m->m_next) 455 space += m->m_len; 456 if (rights0) 457 space += rights0->m_len; 458 if (space > sbspace(sb)) 459 return (0); 460 MGET(m, M_DONTWAIT, MT_SONAME); 461 if (m == 0) 462 return (0); 463 *mtod(m, struct sockaddr *) = *asa; 464 m->m_len = sizeof (*asa); 465 if (rights0 && rights0->m_len) { 466 m->m_next = m_copy(rights0, 0, rights0->m_len); 467 if (m->m_next == 0) { 468 m_freem(m); 469 return (0); 470 } 471 sballoc(sb, m->m_next); 472 } 473 sballoc(sb, m); 474 if (n = sb->sb_mb) { 475 while (n->m_act) 476 n = n->m_act; 477 n->m_act = m; 478 } else 479 sb->sb_mb = m; 480 if (m->m_next) 481 m = m->m_next; 482 if (m0) 483 sbcompress(sb, m0, m); 484 return (1); 485 } 486 487 sbappendrights(sb, m0, rights) 488 struct sockbuf *sb; 489 struct mbuf *rights, *m0; 490 { 491 register struct mbuf *m, *n; 492 int space = 0; 493 494 if (rights == 0) 495 panic("sbappendrights"); 496 for (m = m0; m; m = m->m_next) 497 space += m->m_len; 498 space += rights->m_len; 499 if (space > sbspace(sb)) 500 return (0); 501 m = m_copy(rights, 0, rights->m_len); 502 if (m == 0) 503 return (0); 504 sballoc(sb, m); 505 if (n = sb->sb_mb) { 506 while (n->m_act) 507 n = n->m_act; 508 n->m_act = m; 509 } else 510 sb->sb_mb = m; 511 if (m0) 512 sbcompress(sb, m0, m); 513 return (1); 514 } 515 516 /* 517 * Compress mbuf chain m into the socket 518 * buffer sb following mbuf n. If n 519 * is null, the buffer is presumed empty. 520 */ 521 sbcompress(sb, m, n) 522 register struct sockbuf *sb; 523 register struct mbuf *m, *n; 524 { 525 526 while (m) { 527 if (m->m_len == 0) { 528 m = m_free(m); 529 continue; 530 } 531 if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 532 (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 533 n->m_type == m->m_type) { 534 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 535 (unsigned)m->m_len); 536 n->m_len += m->m_len; 537 sb->sb_cc += m->m_len; 538 m = m_free(m); 539 continue; 540 } 541 sballoc(sb, m); 542 if (n) 543 n->m_next = m; 544 else 545 sb->sb_mb = m; 546 n = m; 547 m = m->m_next; 548 n->m_next = 0; 549 } 550 } 551 552 /* 553 * Free all mbufs in a sockbuf. 554 * Check that all resources are reclaimed. 555 */ 556 sbflush(sb) 557 register struct sockbuf *sb; 558 { 559 560 if (sb->sb_flags & SB_LOCK) 561 panic("sbflush"); 562 while (sb->sb_mbcnt) 563 sbdrop(sb, (int)sb->sb_cc); 564 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 565 panic("sbflush 2"); 566 } 567 568 /* 569 * Drop data from (the front of) a sockbuf. 570 */ 571 sbdrop(sb, len) 572 register struct sockbuf *sb; 573 register int len; 574 { 575 register struct mbuf *m, *mn; 576 struct mbuf *next; 577 578 next = (m = sb->sb_mb) ? m->m_act : 0; 579 while (len > 0) { 580 if (m == 0) { 581 if (next == 0) 582 panic("sbdrop"); 583 m = next; 584 next = m->m_act; 585 continue; 586 } 587 if (m->m_len > len) { 588 m->m_len -= len; 589 m->m_off += len; 590 sb->sb_cc -= len; 591 break; 592 } 593 len -= m->m_len; 594 sbfree(sb, m); 595 MFREE(m, mn); 596 m = mn; 597 } 598 while (m && m->m_len == 0) { 599 sbfree(sb, m); 600 MFREE(m, mn); 601 m = mn; 602 } 603 if (m) { 604 sb->sb_mb = m; 605 m->m_act = next; 606 } else 607 sb->sb_mb = next; 608 } 609 610 /* 611 * Drop a record off the front of a sockbuf 612 * and move the next record to the front. 613 */ 614 sbdroprecord(sb) 615 register struct sockbuf *sb; 616 { 617 register struct mbuf *m, *mn; 618 619 m = sb->sb_mb; 620 if (m) { 621 sb->sb_mb = m->m_act; 622 do { 623 sbfree(sb, m); 624 MFREE(m, mn); 625 } while (m = mn); 626 } 627 } 628