1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)uipc_socket2.c 6.15 (Berkeley) 02/23/86 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dir.h" 12 #include "user.h" 13 #include "proc.h" 14 #include "file.h" 15 #include "inode.h" 16 #include "buf.h" 17 #include "mbuf.h" 18 #include "protosw.h" 19 #include "socket.h" 20 #include "socketvar.h" 21 22 /* 23 * Primitive routines for operating on sockets and socket buffers 24 */ 25 26 /* 27 * Procedures to manipulate state flags of socket 28 * and do appropriate wakeups. Normal sequence from the 29 * active (originating) side is that soisconnecting() is 30 * called during processing of connect() call, 31 * resulting in an eventual call to soisconnected() if/when the 32 * connection is established. When the connection is torn down 33 * soisdisconnecting() is called during processing of disconnect() call, 34 * and soisdisconnected() is called when the connection to the peer 35 * is totally severed. The semantics of these routines are such that 36 * connectionless protocols can call soisconnected() and soisdisconnected() 37 * only, bypassing the in-progress calls when setting up a ``connection'' 38 * takes no time. 39 * 40 * From the passive side, a socket is created with 41 * two queues of sockets: so_q0 for connections in progress 42 * and so_q for connections already made and awaiting user acceptance. 43 * As a protocol is preparing incoming connections, it creates a socket 44 * structure queued on so_q0 by calling sonewconn(). When the connection 45 * is established, soisconnected() is called, and transfers the 46 * socket structure to so_q, making it available to accept(). 47 * 48 * If a socket is closed with sockets on either 49 * so_q0 or so_q, these sockets are dropped. 50 * 51 * If higher level protocols are implemented in 52 * the kernel, the wakeups done here will sometimes 53 * cause software-interrupt process scheduling. 54 */ 55 56 soisconnecting(so) 57 register struct socket *so; 58 { 59 60 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 61 so->so_state |= SS_ISCONNECTING; 62 wakeup((caddr_t)&so->so_timeo); 63 } 64 65 soisconnected(so) 66 register struct socket *so; 67 { 68 register struct socket *head = so->so_head; 69 70 if (head) { 71 if (soqremque(so, 0) == 0) 72 panic("soisconnected"); 73 soqinsque(head, so, 1); 74 sorwakeup(head); 75 wakeup((caddr_t)&head->so_timeo); 76 } 77 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 78 so->so_state |= SS_ISCONNECTED; 79 wakeup((caddr_t)&so->so_timeo); 80 sorwakeup(so); 81 sowwakeup(so); 82 } 83 84 soisdisconnecting(so) 85 register struct socket *so; 86 { 87 88 so->so_state &= ~SS_ISCONNECTING; 89 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 90 wakeup((caddr_t)&so->so_timeo); 91 sowwakeup(so); 92 sorwakeup(so); 93 } 94 95 soisdisconnected(so) 96 register struct socket *so; 97 { 98 99 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 100 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 101 wakeup((caddr_t)&so->so_timeo); 102 sowwakeup(so); 103 sorwakeup(so); 104 } 105 106 /* 107 * When an attempt at a new connection is noted on a socket 108 * which accepts connections, sonewconn is called. If the 109 * connection is possible (subject to space constraints, etc.) 110 * then we allocate a new structure, propoerly linked into the 111 * data structure of the original socket, and return this. 112 */ 113 struct socket * 114 sonewconn(head) 115 register struct socket *head; 116 { 117 register struct socket *so; 118 register struct mbuf *m; 119 120 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 121 goto bad; 122 m = m_getclr(M_DONTWAIT, MT_SOCKET); 123 if (m == NULL) 124 goto bad; 125 so = mtod(m, struct socket *); 126 so->so_type = head->so_type; 127 so->so_options = head->so_options &~ SO_ACCEPTCONN; 128 so->so_linger = head->so_linger; 129 so->so_state = head->so_state | SS_NOFDREF; 130 so->so_proto = head->so_proto; 131 so->so_timeo = head->so_timeo; 132 so->so_pgrp = head->so_pgrp; 133 soqinsque(head, so, 0); 134 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 135 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 136 (void) soqremque(so, 0); 137 (void) m_free(m); 138 goto bad; 139 } 140 return (so); 141 bad: 142 return ((struct socket *)0); 143 } 144 145 soqinsque(head, so, q) 146 register struct socket *head, *so; 147 int q; 148 { 149 150 so->so_head = head; 151 if (q == 0) { 152 head->so_q0len++; 153 so->so_q0 = head->so_q0; 154 head->so_q0 = so; 155 } else { 156 head->so_qlen++; 157 so->so_q = head->so_q; 158 head->so_q = so; 159 } 160 } 161 162 soqremque(so, q) 163 register struct socket *so; 164 int q; 165 { 166 register struct socket *head, *prev, *next; 167 168 head = so->so_head; 169 prev = head; 170 for (;;) { 171 next = q ? prev->so_q : prev->so_q0; 172 if (next == so) 173 break; 174 if (next == head) 175 return (0); 176 prev = next; 177 } 178 if (q == 0) { 179 prev->so_q0 = next->so_q0; 180 head->so_q0len--; 181 } else { 182 prev->so_q = next->so_q; 183 head->so_qlen--; 184 } 185 next->so_q0 = next->so_q = 0; 186 next->so_head = 0; 187 return (1); 188 } 189 190 /* 191 * Socantsendmore indicates that no more data will be sent on the 192 * socket; it would normally be applied to a socket when the user 193 * informs the system that no more data is to be sent, by the protocol 194 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 195 * will be received, and will normally be applied to the socket by a 196 * protocol when it detects that the peer will send no more data. 197 * Data queued for reading in the socket may yet be read. 198 */ 199 200 socantsendmore(so) 201 struct socket *so; 202 { 203 204 so->so_state |= SS_CANTSENDMORE; 205 sowwakeup(so); 206 } 207 208 socantrcvmore(so) 209 struct socket *so; 210 { 211 212 so->so_state |= SS_CANTRCVMORE; 213 sorwakeup(so); 214 } 215 216 /* 217 * Socket select/wakeup routines. 218 */ 219 220 /* 221 * Queue a process for a select on a socket buffer. 222 */ 223 sbselqueue(sb) 224 struct sockbuf *sb; 225 { 226 register struct proc *p; 227 228 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 229 sb->sb_flags |= SB_COLL; 230 else 231 sb->sb_sel = u.u_procp; 232 } 233 234 /* 235 * Wait for data to arrive at/drain from a socket buffer. 236 */ 237 sbwait(sb) 238 struct sockbuf *sb; 239 { 240 241 sb->sb_flags |= SB_WAIT; 242 sleep((caddr_t)&sb->sb_cc, PZERO+1); 243 } 244 245 /* 246 * Wakeup processes waiting on a socket buffer. 247 */ 248 sbwakeup(sb) 249 register struct sockbuf *sb; 250 { 251 252 if (sb->sb_sel) { 253 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 254 sb->sb_sel = 0; 255 sb->sb_flags &= ~SB_COLL; 256 } 257 if (sb->sb_flags & SB_WAIT) { 258 sb->sb_flags &= ~SB_WAIT; 259 wakeup((caddr_t)&sb->sb_cc); 260 } 261 } 262 263 /* 264 * Wakeup socket readers and writers. 265 * Do asynchronous notification via SIGIO 266 * if the socket has the SS_ASYNC flag set. 267 */ 268 sowakeup(so, sb) 269 register struct socket *so; 270 struct sockbuf *sb; 271 { 272 register struct proc *p; 273 274 sbwakeup(sb); 275 if (so->so_state & SS_ASYNC) { 276 if (so->so_pgrp < 0) 277 gsignal(-so->so_pgrp, SIGIO); 278 else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0) 279 psignal(p, SIGIO); 280 } 281 } 282 283 /* 284 * Socket buffer (struct sockbuf) utility routines. 285 * 286 * Each socket contains two socket buffers: one for sending data and 287 * one for receiving data. Each buffer contains a queue of mbufs, 288 * information about the number of mbufs and amount of data in the 289 * queue, and other fields allowing select() statements and notification 290 * on data availability to be implemented. 291 * 292 * Data stored in a socket buffer is maintained as a list of records. 293 * Each record is a list of mbufs chained together with the m_next 294 * field. Records are chained together with the m_act field. The upper 295 * level routine soreceive() expects the following conventions to be 296 * observed when placing information in the receive buffer: 297 * 298 * 1. If the protocol requires each message be preceded by the sender's 299 * name, then a record containing that name must be present before 300 * any associated data (mbuf's must be of type MT_SONAME). 301 * 2. If the protocol supports the exchange of ``access rights'' (really 302 * just additional data associated with the message), and there are 303 * ``rights'' to be received, then a record containing this data 304 * should be present (mbuf's must be of type MT_RIGHTS). 305 * 3. If a name or rights record exists, then it must be followed by 306 * a data record, perhaps of zero length. 307 * 308 * Before using a new socket structure it is first necessary to reserve 309 * buffer space to the socket, by calling sbreserve(). This commits 310 * some of the available buffer space in the system buffer pool for the 311 * socket. The space should be released by calling sbrelease() when the 312 * socket is destroyed. 313 */ 314 315 soreserve(so, sndcc, rcvcc) 316 register struct socket *so; 317 int sndcc, rcvcc; 318 { 319 320 if (sbreserve(&so->so_snd, sndcc) == 0) 321 goto bad; 322 if (sbreserve(&so->so_rcv, rcvcc) == 0) 323 goto bad2; 324 return (0); 325 bad2: 326 sbrelease(&so->so_snd); 327 bad: 328 return (ENOBUFS); 329 } 330 331 /* 332 * Allot mbufs to a sockbuf. 333 */ 334 sbreserve(sb, cc) 335 struct sockbuf *sb; 336 { 337 338 if ((unsigned) cc > SB_MAX) 339 return (0); 340 /* someday maybe this routine will fail... */ 341 sb->sb_hiwat = cc; 342 /* * 2 implies names can be no more than 1 mbuf each */ 343 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 344 return (1); 345 } 346 347 /* 348 * Free mbufs held by a socket, and reserved mbuf space. 349 */ 350 sbrelease(sb) 351 struct sockbuf *sb; 352 { 353 354 sbflush(sb); 355 sb->sb_hiwat = sb->sb_mbmax = 0; 356 } 357 358 /* 359 * Routines to add and remove 360 * data from an mbuf queue. 361 * 362 * The routines sbappend() or sbappendrecord() are normally called to 363 * append new mbufs to a socket buffer, after checking that adequate 364 * space is available, comparing the function sbspace() with the amount 365 * of data to be added. sbappendrecord() differs from sbappend() in 366 * that data supplied is treated as the beginning of a new record. 367 * To place a sender's address, optional access rights, and data in a 368 * socket receive buffer, sbappendaddr() should be used. To place 369 * access rights and data in a socket receive buffer, sbappendrights() 370 * should be used. In either case, the new data begins a new record. 371 * Note that unlike sbappend() and sbappendrecord(), these routines check 372 * for the caller that there will be enough space to store the data. 373 * Each fails if there is not enough space, or if it cannot find mbufs 374 * to store additional information in. 375 * 376 * Reliable protocols may use the socket send buffer to hold data 377 * awaiting acknowledgement. Data is normally copied from a socket 378 * send buffer in a protocol with m_copy for output to a peer, 379 * and then removing the data from the socket buffer with sbdrop() 380 * or sbdroprecord() when the data is acknowledged by the peer. 381 */ 382 383 /* 384 * Append mbuf chain m to the last record in the 385 * socket buffer sb. The additional space associated 386 * the mbuf chain is recorded in sb. Empty mbufs are 387 * discarded and mbufs are compacted where possible. 388 */ 389 sbappend(sb, m) 390 struct sockbuf *sb; 391 struct mbuf *m; 392 { 393 register struct mbuf *n; 394 395 if (m == 0) 396 return; 397 if (n = sb->sb_mb) { 398 while (n->m_act) 399 n = n->m_act; 400 while (n->m_next) 401 n = n->m_next; 402 } 403 sbcompress(sb, m, n); 404 } 405 406 /* 407 * As above, except the mbuf chain 408 * begins a new record. 409 */ 410 sbappendrecord(sb, m0) 411 register struct sockbuf *sb; 412 register struct mbuf *m0; 413 { 414 register struct mbuf *m; 415 416 if (m0 == 0) 417 return; 418 if (m = sb->sb_mb) 419 while (m->m_act) 420 m = m->m_act; 421 /* 422 * Put the first mbuf on the queue. 423 * Note this permits zero length records. 424 */ 425 sballoc(sb, m0); 426 if (m) 427 m->m_act = m0; 428 else 429 sb->sb_mb = m0; 430 m = m0->m_next; 431 m0->m_next = 0; 432 sbcompress(sb, m, m0); 433 } 434 435 /* 436 * Append address and data, and optionally, rights 437 * to the receive queue of a socket. Return 0 if 438 * no space in sockbuf or insufficient mbufs. 439 */ 440 sbappendaddr(sb, asa, m0, rights0) 441 register struct sockbuf *sb; 442 struct sockaddr *asa; 443 struct mbuf *rights0, *m0; 444 { 445 register struct mbuf *m, *n; 446 int space = sizeof (*asa); 447 448 for (m = m0; m; m = m->m_next) 449 space += m->m_len; 450 if (rights0) 451 space += rights0->m_len; 452 if (space > sbspace(sb)) 453 return (0); 454 MGET(m, M_DONTWAIT, MT_SONAME); 455 if (m == 0) 456 return (0); 457 *mtod(m, struct sockaddr *) = *asa; 458 m->m_len = sizeof (*asa); 459 if (rights0 && rights0->m_len) { 460 m->m_next = m_copy(rights0, 0, rights0->m_len); 461 if (m->m_next == 0) { 462 m_freem(m); 463 return (0); 464 } 465 sballoc(sb, m->m_next); 466 } 467 sballoc(sb, m); 468 if (n = sb->sb_mb) { 469 while (n->m_act) 470 n = n->m_act; 471 n->m_act = m; 472 } else 473 sb->sb_mb = m; 474 if (m->m_next) 475 m = m->m_next; 476 if (m0) 477 sbcompress(sb, m0, m); 478 return (1); 479 } 480 481 sbappendrights(sb, m0, rights) 482 struct sockbuf *sb; 483 struct mbuf *rights, *m0; 484 { 485 register struct mbuf *m, *n; 486 int space = 0; 487 488 if (rights == 0) 489 panic("sbappendrights"); 490 for (m = m0; m; m = m->m_next) 491 space += m->m_len; 492 space += rights->m_len; 493 if (space > sbspace(sb)) 494 return (0); 495 m = m_copy(rights, 0, rights->m_len); 496 if (m == 0) 497 return (0); 498 sballoc(sb, m); 499 if (n = sb->sb_mb) { 500 while (n->m_act) 501 n = n->m_act; 502 n->m_act = m; 503 } else 504 sb->sb_mb = m; 505 if (m0) 506 sbcompress(sb, m0, m); 507 return (1); 508 } 509 510 /* 511 * Compress mbuf chain m into the socket 512 * buffer sb following mbuf n. If n 513 * is null, the buffer is presumed empty. 514 */ 515 sbcompress(sb, m, n) 516 register struct sockbuf *sb; 517 register struct mbuf *m, *n; 518 { 519 520 while (m) { 521 if (m->m_len == 0) { 522 m = m_free(m); 523 continue; 524 } 525 if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 526 (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 527 n->m_type == m->m_type) { 528 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 529 (unsigned)m->m_len); 530 n->m_len += m->m_len; 531 sb->sb_cc += m->m_len; 532 m = m_free(m); 533 continue; 534 } 535 sballoc(sb, m); 536 if (n) 537 n->m_next = m; 538 else 539 sb->sb_mb = m; 540 n = m; 541 m = m->m_next; 542 n->m_next = 0; 543 } 544 } 545 546 /* 547 * Free all mbufs in a sockbuf. 548 * Check that all resources are reclaimed. 549 */ 550 sbflush(sb) 551 register struct sockbuf *sb; 552 { 553 554 if (sb->sb_flags & SB_LOCK) 555 panic("sbflush"); 556 while (sb->sb_mbcnt) 557 sbdrop(sb, (int)sb->sb_cc); 558 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 559 panic("sbflush 2"); 560 } 561 562 /* 563 * Drop data from (the front of) a sockbuf. 564 */ 565 sbdrop(sb, len) 566 register struct sockbuf *sb; 567 register int len; 568 { 569 register struct mbuf *m, *mn; 570 struct mbuf *next; 571 572 next = (m = sb->sb_mb) ? m->m_act : 0; 573 while (len > 0) { 574 if (m == 0) { 575 if (next == 0) 576 panic("sbdrop"); 577 m = next; 578 next = m->m_act; 579 continue; 580 } 581 if (m->m_len > len) { 582 m->m_len -= len; 583 m->m_off += len; 584 sb->sb_cc -= len; 585 break; 586 } 587 len -= m->m_len; 588 sbfree(sb, m); 589 MFREE(m, mn); 590 m = mn; 591 } 592 while (m && m->m_len == 0) { 593 sbfree(sb, m); 594 MFREE(m, mn); 595 m = mn; 596 } 597 if (m) { 598 sb->sb_mb = m; 599 m->m_act = next; 600 } else 601 sb->sb_mb = next; 602 } 603 604 /* 605 * Drop a record off the front of a sockbuf 606 * and move the next record to the front. 607 */ 608 sbdroprecord(sb) 609 register struct sockbuf *sb; 610 { 611 register struct mbuf *m, *mn; 612 613 m = sb->sb_mb; 614 if (m) { 615 sb->sb_mb = m->m_act; 616 do { 617 sbfree(sb, m); 618 MFREE(m, mn); 619 } while (m = mn); 620 } 621 } 622