1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that this notice is preserved and that due credit is given 7 * to the University of California at Berkeley. The name of the University 8 * may not be used to endorse or promote products derived from this 9 * software without specific prior written permission. This software 10 * is provided ``as is'' without express or implied warranty. 11 * 12 * @(#)uipc_socket2.c 7.4 (Berkeley) 05/26/88 13 */ 14 15 #include "param.h" 16 #include "systm.h" 17 #include "dir.h" 18 #include "user.h" 19 #include "proc.h" 20 #include "file.h" 21 #include "inode.h" 22 #include "buf.h" 23 #include "mbuf.h" 24 #include "protosw.h" 25 #include "socket.h" 26 #include "socketvar.h" 27 28 /* 29 * Primitive routines for operating on sockets and socket buffers 30 */ 31 32 /* 33 * Procedures to manipulate state flags of socket 34 * and do appropriate wakeups. Normal sequence from the 35 * active (originating) side is that soisconnecting() is 36 * called during processing of connect() call, 37 * resulting in an eventual call to soisconnected() if/when the 38 * connection is established. When the connection is torn down 39 * soisdisconnecting() is called during processing of disconnect() call, 40 * and soisdisconnected() is called when the connection to the peer 41 * is totally severed. The semantics of these routines are such that 42 * connectionless protocols can call soisconnected() and soisdisconnected() 43 * only, bypassing the in-progress calls when setting up a ``connection'' 44 * takes no time. 45 * 46 * From the passive side, a socket is created with 47 * two queues of sockets: so_q0 for connections in progress 48 * and so_q for connections already made and awaiting user acceptance. 49 * As a protocol is preparing incoming connections, it creates a socket 50 * structure queued on so_q0 by calling sonewconn(). When the connection 51 * is established, soisconnected() is called, and transfers the 52 * socket structure to so_q, making it available to accept(). 53 * 54 * If a socket is closed with sockets on either 55 * so_q0 or so_q, these sockets are dropped. 56 * 57 * If higher level protocols are implemented in 58 * the kernel, the wakeups done here will sometimes 59 * cause software-interrupt process scheduling. 60 */ 61 62 soisconnecting(so) 63 register struct socket *so; 64 { 65 66 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 67 so->so_state |= SS_ISCONNECTING; 68 wakeup((caddr_t)&so->so_timeo); 69 } 70 71 soisconnected(so) 72 register struct socket *so; 73 { 74 register struct socket *head = so->so_head; 75 76 if (head) { 77 if (soqremque(so, 0) == 0) 78 panic("soisconnected"); 79 soqinsque(head, so, 1); 80 sorwakeup(head); 81 wakeup((caddr_t)&head->so_timeo); 82 } 83 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 84 so->so_state |= SS_ISCONNECTED; 85 wakeup((caddr_t)&so->so_timeo); 86 sorwakeup(so); 87 sowwakeup(so); 88 } 89 90 soisdisconnecting(so) 91 register struct socket *so; 92 { 93 94 so->so_state &= ~SS_ISCONNECTING; 95 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 96 wakeup((caddr_t)&so->so_timeo); 97 sowwakeup(so); 98 sorwakeup(so); 99 } 100 101 soisdisconnected(so) 102 register struct socket *so; 103 { 104 105 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 106 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 107 wakeup((caddr_t)&so->so_timeo); 108 sowwakeup(so); 109 sorwakeup(so); 110 } 111 112 /* 113 * When an attempt at a new connection is noted on a socket 114 * which accepts connections, sonewconn is called. If the 115 * connection is possible (subject to space constraints, etc.) 116 * then we allocate a new structure, propoerly linked into the 117 * data structure of the original socket, and return this. 118 */ 119 struct socket * 120 sonewconn(head) 121 register struct socket *head; 122 { 123 register struct socket *so; 124 register struct mbuf *m; 125 126 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 127 goto bad; 128 m = m_getclr(M_DONTWAIT, MT_SOCKET); 129 if (m == NULL) 130 goto bad; 131 so = mtod(m, struct socket *); 132 so->so_type = head->so_type; 133 so->so_options = head->so_options &~ SO_ACCEPTCONN; 134 so->so_linger = head->so_linger; 135 so->so_state = head->so_state | SS_NOFDREF; 136 so->so_proto = head->so_proto; 137 so->so_timeo = head->so_timeo; 138 so->so_pgrp = head->so_pgrp; 139 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_snd.sb_hiwat); 140 soqinsque(head, so, 0); 141 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 142 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 143 (void) soqremque(so, 0); 144 (void) m_free(m); 145 goto bad; 146 } 147 return (so); 148 bad: 149 return ((struct socket *)0); 150 } 151 152 soqinsque(head, so, q) 153 register struct socket *head, *so; 154 int q; 155 { 156 157 so->so_head = head; 158 if (q == 0) { 159 head->so_q0len++; 160 so->so_q0 = head->so_q0; 161 head->so_q0 = so; 162 } else { 163 head->so_qlen++; 164 so->so_q = head->so_q; 165 head->so_q = so; 166 } 167 } 168 169 soqremque(so, q) 170 register struct socket *so; 171 int q; 172 { 173 register struct socket *head, *prev, *next; 174 175 head = so->so_head; 176 prev = head; 177 for (;;) { 178 next = q ? prev->so_q : prev->so_q0; 179 if (next == so) 180 break; 181 if (next == head) 182 return (0); 183 prev = next; 184 } 185 if (q == 0) { 186 prev->so_q0 = next->so_q0; 187 head->so_q0len--; 188 } else { 189 prev->so_q = next->so_q; 190 head->so_qlen--; 191 } 192 next->so_q0 = next->so_q = 0; 193 next->so_head = 0; 194 return (1); 195 } 196 197 /* 198 * Socantsendmore indicates that no more data will be sent on the 199 * socket; it would normally be applied to a socket when the user 200 * informs the system that no more data is to be sent, by the protocol 201 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 202 * will be received, and will normally be applied to the socket by a 203 * protocol when it detects that the peer will send no more data. 204 * Data queued for reading in the socket may yet be read. 205 */ 206 207 socantsendmore(so) 208 struct socket *so; 209 { 210 211 so->so_state |= SS_CANTSENDMORE; 212 sowwakeup(so); 213 } 214 215 socantrcvmore(so) 216 struct socket *so; 217 { 218 219 so->so_state |= SS_CANTRCVMORE; 220 sorwakeup(so); 221 } 222 223 /* 224 * Socket select/wakeup routines. 225 */ 226 227 /* 228 * Queue a process for a select on a socket buffer. 229 */ 230 sbselqueue(sb) 231 struct sockbuf *sb; 232 { 233 register struct proc *p; 234 235 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 236 sb->sb_flags |= SB_COLL; 237 else 238 sb->sb_sel = u.u_procp; 239 } 240 241 /* 242 * Wait for data to arrive at/drain from a socket buffer. 243 */ 244 sbwait(sb) 245 struct sockbuf *sb; 246 { 247 248 sb->sb_flags |= SB_WAIT; 249 sleep((caddr_t)&sb->sb_cc, PZERO+1); 250 } 251 252 /* 253 * Wakeup processes waiting on a socket buffer. 254 */ 255 sbwakeup(sb) 256 register struct sockbuf *sb; 257 { 258 259 if (sb->sb_sel) { 260 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 261 sb->sb_sel = 0; 262 sb->sb_flags &= ~SB_COLL; 263 } 264 if (sb->sb_flags & SB_WAIT) { 265 sb->sb_flags &= ~SB_WAIT; 266 wakeup((caddr_t)&sb->sb_cc); 267 } 268 } 269 270 /* 271 * Wakeup socket readers and writers. 272 * Do asynchronous notification via SIGIO 273 * if the socket has the SS_ASYNC flag set. 274 */ 275 sowakeup(so, sb) 276 register struct socket *so; 277 struct sockbuf *sb; 278 { 279 register struct proc *p; 280 281 sbwakeup(sb); 282 if (so->so_state & SS_ASYNC) { 283 if (so->so_pgrp < 0) 284 gsignal(-so->so_pgrp, SIGIO); 285 else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0) 286 psignal(p, SIGIO); 287 } 288 } 289 290 /* 291 * Socket buffer (struct sockbuf) utility routines. 292 * 293 * Each socket contains two socket buffers: one for sending data and 294 * one for receiving data. Each buffer contains a queue of mbufs, 295 * information about the number of mbufs and amount of data in the 296 * queue, and other fields allowing select() statements and notification 297 * on data availability to be implemented. 298 * 299 * Data stored in a socket buffer is maintained as a list of records. 300 * Each record is a list of mbufs chained together with the m_next 301 * field. Records are chained together with the m_act field. The upper 302 * level routine soreceive() expects the following conventions to be 303 * observed when placing information in the receive buffer: 304 * 305 * 1. If the protocol requires each message be preceded by the sender's 306 * name, then a record containing that name must be present before 307 * any associated data (mbuf's must be of type MT_SONAME). 308 * 2. If the protocol supports the exchange of ``access rights'' (really 309 * just additional data associated with the message), and there are 310 * ``rights'' to be received, then a record containing this data 311 * should be present (mbuf's must be of type MT_RIGHTS). 312 * 3. If a name or rights record exists, then it must be followed by 313 * a data record, perhaps of zero length. 314 * 315 * Before using a new socket structure it is first necessary to reserve 316 * buffer space to the socket, by calling sbreserve(). This should commit 317 * some of the available buffer space in the system buffer pool for the 318 * socket (currently, it does nothing but enforce limits). The space 319 * should be released by calling sbrelease() when the socket is destroyed. 320 */ 321 322 soreserve(so, sndcc, rcvcc) 323 register struct socket *so; 324 u_long sndcc, rcvcc; 325 { 326 327 if (sbreserve(&so->so_snd, sndcc) == 0) 328 goto bad; 329 if (sbreserve(&so->so_rcv, rcvcc) == 0) 330 goto bad2; 331 return (0); 332 bad2: 333 sbrelease(&so->so_snd); 334 bad: 335 return (ENOBUFS); 336 } 337 338 /* 339 * Allot mbufs to a sockbuf. 340 * Attempt to scale cc so that mbcnt doesn't become limiting 341 * if buffering efficiency is near the normal case. 342 */ 343 sbreserve(sb, cc) 344 struct sockbuf *sb; 345 u_long cc; 346 { 347 348 if (cc > (u_long)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES)) 349 return (0); 350 sb->sb_hiwat = cc; 351 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 352 return (1); 353 } 354 355 /* 356 * Free mbufs held by a socket, and reserved mbuf space. 357 */ 358 sbrelease(sb) 359 struct sockbuf *sb; 360 { 361 362 sbflush(sb); 363 sb->sb_hiwat = sb->sb_mbmax = 0; 364 } 365 366 /* 367 * Routines to add and remove 368 * data from an mbuf queue. 369 * 370 * The routines sbappend() or sbappendrecord() are normally called to 371 * append new mbufs to a socket buffer, after checking that adequate 372 * space is available, comparing the function sbspace() with the amount 373 * of data to be added. sbappendrecord() differs from sbappend() in 374 * that data supplied is treated as the beginning of a new record. 375 * To place a sender's address, optional access rights, and data in a 376 * socket receive buffer, sbappendaddr() should be used. To place 377 * access rights and data in a socket receive buffer, sbappendrights() 378 * should be used. In either case, the new data begins a new record. 379 * Note that unlike sbappend() and sbappendrecord(), these routines check 380 * for the caller that there will be enough space to store the data. 381 * Each fails if there is not enough space, or if it cannot find mbufs 382 * to store additional information in. 383 * 384 * Reliable protocols may use the socket send buffer to hold data 385 * awaiting acknowledgement. Data is normally copied from a socket 386 * send buffer in a protocol with m_copy for output to a peer, 387 * and then removing the data from the socket buffer with sbdrop() 388 * or sbdroprecord() when the data is acknowledged by the peer. 389 */ 390 391 /* 392 * Append mbuf chain m to the last record in the 393 * socket buffer sb. The additional space associated 394 * the mbuf chain is recorded in sb. Empty mbufs are 395 * discarded and mbufs are compacted where possible. 396 */ 397 sbappend(sb, m) 398 struct sockbuf *sb; 399 struct mbuf *m; 400 { 401 register struct mbuf *n; 402 403 if (m == 0) 404 return; 405 if (n = sb->sb_mb) { 406 while (n->m_act) 407 n = n->m_act; 408 while (n->m_next) 409 n = n->m_next; 410 } 411 sbcompress(sb, m, n); 412 } 413 414 /* 415 * As above, except the mbuf chain 416 * begins a new record. 417 */ 418 sbappendrecord(sb, m0) 419 register struct sockbuf *sb; 420 register struct mbuf *m0; 421 { 422 register struct mbuf *m; 423 424 if (m0 == 0) 425 return; 426 if (m = sb->sb_mb) 427 while (m->m_act) 428 m = m->m_act; 429 /* 430 * Put the first mbuf on the queue. 431 * Note this permits zero length records. 432 */ 433 sballoc(sb, m0); 434 if (m) 435 m->m_act = m0; 436 else 437 sb->sb_mb = m0; 438 m = m0->m_next; 439 m0->m_next = 0; 440 sbcompress(sb, m, m0); 441 } 442 443 /* 444 * Append address and data, and optionally, rights 445 * to the receive queue of a socket. Return 0 if 446 * no space in sockbuf or insufficient mbufs. 447 */ 448 sbappendaddr(sb, asa, m0, rights0) 449 register struct sockbuf *sb; 450 struct sockaddr *asa; 451 struct mbuf *rights0, *m0; 452 { 453 register struct mbuf *m, *n; 454 int space = sizeof (*asa); 455 456 for (m = m0; m; m = m->m_next) 457 space += m->m_len; 458 if (rights0) 459 space += rights0->m_len; 460 if (space > sbspace(sb)) 461 return (0); 462 MGET(m, M_DONTWAIT, MT_SONAME); 463 if (m == 0) 464 return (0); 465 *mtod(m, struct sockaddr *) = *asa; 466 m->m_len = sizeof (*asa); 467 if (rights0 && rights0->m_len) { 468 m->m_next = m_copy(rights0, 0, rights0->m_len); 469 if (m->m_next == 0) { 470 m_freem(m); 471 return (0); 472 } 473 sballoc(sb, m->m_next); 474 } 475 sballoc(sb, m); 476 if (n = sb->sb_mb) { 477 while (n->m_act) 478 n = n->m_act; 479 n->m_act = m; 480 } else 481 sb->sb_mb = m; 482 if (m->m_next) 483 m = m->m_next; 484 if (m0) 485 sbcompress(sb, m0, m); 486 return (1); 487 } 488 489 sbappendrights(sb, m0, rights) 490 struct sockbuf *sb; 491 struct mbuf *rights, *m0; 492 { 493 register struct mbuf *m, *n; 494 int space = 0; 495 496 if (rights == 0) 497 panic("sbappendrights"); 498 for (m = m0; m; m = m->m_next) 499 space += m->m_len; 500 space += rights->m_len; 501 if (space > sbspace(sb)) 502 return (0); 503 m = m_copy(rights, 0, rights->m_len); 504 if (m == 0) 505 return (0); 506 sballoc(sb, m); 507 if (n = sb->sb_mb) { 508 while (n->m_act) 509 n = n->m_act; 510 n->m_act = m; 511 } else 512 sb->sb_mb = m; 513 if (m0) 514 sbcompress(sb, m0, m); 515 return (1); 516 } 517 518 /* 519 * Compress mbuf chain m into the socket 520 * buffer sb following mbuf n. If n 521 * is null, the buffer is presumed empty. 522 */ 523 sbcompress(sb, m, n) 524 register struct sockbuf *sb; 525 register struct mbuf *m, *n; 526 { 527 528 while (m) { 529 if (m->m_len == 0) { 530 m = m_free(m); 531 continue; 532 } 533 if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 534 (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 535 n->m_type == m->m_type) { 536 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 537 (unsigned)m->m_len); 538 n->m_len += m->m_len; 539 sb->sb_cc += m->m_len; 540 m = m_free(m); 541 continue; 542 } 543 sballoc(sb, m); 544 if (n) 545 n->m_next = m; 546 else 547 sb->sb_mb = m; 548 n = m; 549 m = m->m_next; 550 n->m_next = 0; 551 } 552 } 553 554 /* 555 * Free all mbufs in a sockbuf. 556 * Check that all resources are reclaimed. 557 */ 558 sbflush(sb) 559 register struct sockbuf *sb; 560 { 561 562 if (sb->sb_flags & SB_LOCK) 563 panic("sbflush"); 564 while (sb->sb_mbcnt) 565 sbdrop(sb, (int)sb->sb_cc); 566 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 567 panic("sbflush 2"); 568 } 569 570 /* 571 * Drop data from (the front of) a sockbuf. 572 */ 573 sbdrop(sb, len) 574 register struct sockbuf *sb; 575 register int len; 576 { 577 register struct mbuf *m, *mn; 578 struct mbuf *next; 579 580 next = (m = sb->sb_mb) ? m->m_act : 0; 581 while (len > 0) { 582 if (m == 0) { 583 if (next == 0) 584 panic("sbdrop"); 585 m = next; 586 next = m->m_act; 587 continue; 588 } 589 if (m->m_len > len) { 590 m->m_len -= len; 591 m->m_off += len; 592 sb->sb_cc -= len; 593 break; 594 } 595 len -= m->m_len; 596 sbfree(sb, m); 597 MFREE(m, mn); 598 m = mn; 599 } 600 while (m && m->m_len == 0) { 601 sbfree(sb, m); 602 MFREE(m, mn); 603 m = mn; 604 } 605 if (m) { 606 sb->sb_mb = m; 607 m->m_act = next; 608 } else 609 sb->sb_mb = next; 610 } 611 612 /* 613 * Drop a record off the front of a sockbuf 614 * and move the next record to the front. 615 */ 616 sbdroprecord(sb) 617 register struct sockbuf *sb; 618 { 619 register struct mbuf *m, *mn; 620 621 m = sb->sb_mb; 622 if (m) { 623 sb->sb_mb = m->m_act; 624 do { 625 sbfree(sb, m); 626 MFREE(m, mn); 627 } while (m = mn); 628 } 629 } 630