1 /* $NetBSD: uipc_usrreq.c,v 1.28 1997/10/17 17:35:08 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/filedesc.h> 43 #include <sys/domain.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/unpcb.h> 48 #include <sys/un.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/file.h> 52 #include <sys/stat.h> 53 #include <sys/mbuf.h> 54 55 /* 56 * Unix communications domain. 57 * 58 * TODO: 59 * SEQPACKET, RDM 60 * rethink name space problems 61 * need a proper out-of-band 62 */ 63 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX }; 64 ino_t unp_ino; /* prototype for fake inode numbers */ 65 66 int 67 unp_output(m, control, unp) 68 struct mbuf *m, *control; 69 struct unpcb *unp; 70 { 71 struct socket *so2; 72 struct sockaddr_un *sun; 73 74 so2 = unp->unp_conn->unp_socket; 75 if (unp->unp_addr) 76 sun = unp->unp_addr; 77 else 78 sun = &sun_noname; 79 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 80 control) == 0) { 81 m_freem(control); 82 m_freem(m); 83 return (EINVAL); 84 } else { 85 sorwakeup(so2); 86 return (0); 87 } 88 } 89 90 void 91 unp_setsockaddr(unp, nam) 92 register struct unpcb *unp; 93 struct mbuf *nam; 94 { 95 struct sockaddr_un *sun; 96 97 if (unp->unp_addr) 98 sun = unp->unp_addr; 99 else 100 sun = &sun_noname; 101 nam->m_len = sun->sun_len; 102 if (nam->m_len > MLEN) 103 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 104 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 105 } 106 107 void 108 unp_setpeeraddr(unp, nam) 109 register struct unpcb *unp; 110 struct mbuf *nam; 111 { 112 struct sockaddr_un *sun; 113 114 if (unp->unp_conn && unp->unp_conn->unp_addr) 115 sun = unp->unp_conn->unp_addr; 116 else 117 sun = &sun_noname; 118 nam->m_len = sun->sun_len; 119 if (nam->m_len > MLEN) 120 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 121 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 122 } 123 124 /*ARGSUSED*/ 125 int 126 uipc_usrreq(so, req, m, nam, control, p) 127 struct socket *so; 128 int req; 129 struct mbuf *m, *nam, *control; 130 struct proc *p; 131 { 132 struct unpcb *unp = sotounpcb(so); 133 register struct socket *so2; 134 register int error = 0; 135 136 if (req == PRU_CONTROL) 137 return (EOPNOTSUPP); 138 139 #ifdef DIAGNOSTIC 140 if (req != PRU_SEND && req != PRU_SENDOOB && control) 141 panic("uipc_usrreq: unexpected control mbuf"); 142 #endif 143 if (unp == 0 && req != PRU_ATTACH) { 144 error = EINVAL; 145 goto release; 146 } 147 148 switch (req) { 149 150 case PRU_ATTACH: 151 if (unp != 0) { 152 error = EISCONN; 153 break; 154 } 155 error = unp_attach(so); 156 break; 157 158 case PRU_DETACH: 159 unp_detach(unp); 160 break; 161 162 case PRU_BIND: 163 error = unp_bind(unp, nam, p); 164 break; 165 166 case PRU_LISTEN: 167 if (unp->unp_vnode == 0) 168 error = EINVAL; 169 break; 170 171 case PRU_CONNECT: 172 error = unp_connect(so, nam, p); 173 break; 174 175 case PRU_CONNECT2: 176 error = unp_connect2(so, (struct socket *)nam); 177 break; 178 179 case PRU_DISCONNECT: 180 unp_disconnect(unp); 181 break; 182 183 case PRU_ACCEPT: 184 unp_setpeeraddr(unp, nam); 185 break; 186 187 case PRU_SHUTDOWN: 188 socantsendmore(so); 189 unp_shutdown(unp); 190 break; 191 192 case PRU_RCVD: 193 switch (so->so_type) { 194 195 case SOCK_DGRAM: 196 panic("uipc 1"); 197 /*NOTREACHED*/ 198 199 case SOCK_STREAM: 200 #define rcv (&so->so_rcv) 201 #define snd (&so2->so_snd) 202 if (unp->unp_conn == 0) 203 break; 204 so2 = unp->unp_conn->unp_socket; 205 /* 206 * Adjust backpressure on sender 207 * and wakeup any waiting to write. 208 */ 209 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 210 unp->unp_mbcnt = rcv->sb_mbcnt; 211 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 212 unp->unp_cc = rcv->sb_cc; 213 sowwakeup(so2); 214 #undef snd 215 #undef rcv 216 break; 217 218 default: 219 panic("uipc 2"); 220 } 221 break; 222 223 case PRU_SEND: 224 if (control && (error = unp_internalize(control, p))) 225 break; 226 switch (so->so_type) { 227 228 case SOCK_DGRAM: { 229 if (nam) { 230 if ((so->so_state & SS_ISCONNECTED) != 0) { 231 error = EISCONN; 232 goto die; 233 } 234 error = unp_connect(so, nam, p); 235 if (error) { 236 die: 237 m_freem(control); 238 m_freem(m); 239 break; 240 } 241 } else { 242 if ((so->so_state & SS_ISCONNECTED) == 0) { 243 error = ENOTCONN; 244 goto die; 245 } 246 } 247 error = unp_output(m, control, unp); 248 if (nam) 249 unp_disconnect(unp); 250 break; 251 } 252 253 case SOCK_STREAM: 254 #define rcv (&so2->so_rcv) 255 #define snd (&so->so_snd) 256 if (unp->unp_conn == 0) 257 panic("uipc 3"); 258 so2 = unp->unp_conn->unp_socket; 259 /* 260 * Send to paired receive port, and then reduce 261 * send buffer hiwater marks to maintain backpressure. 262 * Wake up readers. 263 */ 264 if (control) { 265 if (sbappendcontrol(rcv, m, control) == 0) 266 m_freem(control); 267 } else 268 sbappend(rcv, m); 269 snd->sb_mbmax -= 270 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 271 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 272 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 273 unp->unp_conn->unp_cc = rcv->sb_cc; 274 sorwakeup(so2); 275 #undef snd 276 #undef rcv 277 break; 278 279 default: 280 panic("uipc 4"); 281 } 282 break; 283 284 case PRU_ABORT: 285 unp_drop(unp, ECONNABORTED); 286 break; 287 288 case PRU_SENSE: 289 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 290 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 291 so2 = unp->unp_conn->unp_socket; 292 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 293 } 294 ((struct stat *) m)->st_dev = NODEV; 295 if (unp->unp_ino == 0) 296 unp->unp_ino = unp_ino++; 297 ((struct stat *) m)->st_atimespec = 298 ((struct stat *) m)->st_mtimespec = 299 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 300 ((struct stat *) m)->st_ino = unp->unp_ino; 301 return (0); 302 303 case PRU_RCVOOB: 304 error = EOPNOTSUPP; 305 break; 306 307 case PRU_SENDOOB: 308 m_freem(control); 309 m_freem(m); 310 error = EOPNOTSUPP; 311 break; 312 313 case PRU_SOCKADDR: 314 unp_setsockaddr(unp, nam); 315 break; 316 317 case PRU_PEERADDR: 318 unp_setpeeraddr(unp, nam); 319 break; 320 321 default: 322 panic("piusrreq"); 323 } 324 325 release: 326 return (error); 327 } 328 329 /* 330 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 331 * for stream sockets, although the total for sender and receiver is 332 * actually only PIPSIZ. 333 * Datagram sockets really use the sendspace as the maximum datagram size, 334 * and don't really want to reserve the sendspace. Their recvspace should 335 * be large enough for at least one max-size datagram plus address. 336 */ 337 #define PIPSIZ 4096 338 u_long unpst_sendspace = PIPSIZ; 339 u_long unpst_recvspace = PIPSIZ; 340 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 341 u_long unpdg_recvspace = 4*1024; 342 343 int unp_rights; /* file descriptors in flight */ 344 345 int 346 unp_attach(so) 347 struct socket *so; 348 { 349 register struct unpcb *unp; 350 struct timeval tv; 351 int error; 352 353 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 354 switch (so->so_type) { 355 356 case SOCK_STREAM: 357 error = soreserve(so, unpst_sendspace, unpst_recvspace); 358 break; 359 360 case SOCK_DGRAM: 361 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 362 break; 363 364 default: 365 panic("unp_attach"); 366 } 367 if (error) 368 return (error); 369 } 370 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 371 if (unp == NULL) 372 return (ENOBUFS); 373 bzero((caddr_t)unp, sizeof(*unp)); 374 unp->unp_socket = so; 375 so->so_pcb = unp; 376 microtime(&tv); 377 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 378 return (0); 379 } 380 381 void 382 unp_detach(unp) 383 register struct unpcb *unp; 384 { 385 386 if (unp->unp_vnode) { 387 unp->unp_vnode->v_socket = 0; 388 vrele(unp->unp_vnode); 389 unp->unp_vnode = 0; 390 } 391 if (unp->unp_conn) 392 unp_disconnect(unp); 393 while (unp->unp_refs) 394 unp_drop(unp->unp_refs, ECONNRESET); 395 soisdisconnected(unp->unp_socket); 396 unp->unp_socket->so_pcb = 0; 397 if (unp->unp_addr) 398 free(unp->unp_addr, M_SONAME); 399 if (unp_rights) { 400 /* 401 * Normally the receive buffer is flushed later, 402 * in sofree, but if our receive buffer holds references 403 * to descriptors that are now garbage, we will dispose 404 * of those descriptor references after the garbage collector 405 * gets them (resulting in a "panic: closef: count < 0"). 406 */ 407 sorflush(unp->unp_socket); 408 free(unp, M_PCB); 409 unp_gc(); 410 } else 411 free(unp, M_PCB); 412 } 413 414 int 415 unp_bind(unp, nam, p) 416 struct unpcb *unp; 417 struct mbuf *nam; 418 struct proc *p; 419 { 420 struct sockaddr_un *sun; 421 register struct vnode *vp; 422 struct vattr vattr; 423 size_t addrlen; 424 int error; 425 struct nameidata nd; 426 427 if (unp->unp_vnode != 0) 428 return (EINVAL); 429 430 /* 431 * Allocate the new sockaddr. We have to allocate one 432 * extra byte so that we can ensure that the pathname 433 * is nul-terminated. 434 */ 435 addrlen = nam->m_len + 1; 436 sun = malloc(addrlen, M_SONAME, M_WAITOK); 437 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 438 *(((char *)sun) + nam->m_len) = '\0'; 439 440 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 441 sun->sun_path, p); 442 443 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 444 if ((error = namei(&nd)) != 0) 445 goto bad; 446 vp = nd.ni_vp; 447 if (vp != NULL) { 448 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 449 if (nd.ni_dvp == vp) 450 vrele(nd.ni_dvp); 451 else 452 vput(nd.ni_dvp); 453 vrele(vp); 454 error = EADDRINUSE; 455 goto bad; 456 } 457 VATTR_NULL(&vattr); 458 vattr.va_type = VSOCK; 459 vattr.va_mode = ACCESSPERMS; 460 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 461 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 462 if (error) 463 goto bad; 464 vp = nd.ni_vp; 465 vp->v_socket = unp->unp_socket; 466 unp->unp_vnode = vp; 467 unp->unp_addrlen = addrlen; 468 unp->unp_addr = sun; 469 VOP_UNLOCK(vp); 470 return (0); 471 472 bad: 473 free(sun, M_SONAME); 474 return (error); 475 } 476 477 int 478 unp_connect(so, nam, p) 479 struct socket *so; 480 struct mbuf *nam; 481 struct proc *p; 482 { 483 register struct sockaddr_un *sun; 484 register struct vnode *vp; 485 register struct socket *so2, *so3; 486 struct unpcb *unp2, *unp3; 487 size_t addrlen; 488 int error; 489 struct nameidata nd; 490 491 /* 492 * Allocate a temporary sockaddr. We have to allocate one extra 493 * byte so that we can ensure that the pathname is nul-terminated. 494 * When we establish the connection, we copy the other PCB's 495 * sockaddr to our own. 496 */ 497 addrlen = nam->m_len + 1; 498 sun = malloc(addrlen, M_SONAME, M_WAITOK); 499 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 500 *(((char *)sun) + nam->m_len) = '\0'; 501 502 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 503 504 if ((error = namei(&nd)) != 0) 505 goto bad2; 506 vp = nd.ni_vp; 507 if (vp->v_type != VSOCK) { 508 error = ENOTSOCK; 509 goto bad; 510 } 511 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 512 goto bad; 513 so2 = vp->v_socket; 514 if (so2 == 0) { 515 error = ECONNREFUSED; 516 goto bad; 517 } 518 if (so->so_type != so2->so_type) { 519 error = EPROTOTYPE; 520 goto bad; 521 } 522 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 523 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 524 (so3 = sonewconn(so2, 0)) == 0) { 525 error = ECONNREFUSED; 526 goto bad; 527 } 528 unp2 = sotounpcb(so2); 529 unp3 = sotounpcb(so3); 530 if (unp2->unp_addr) { 531 unp3->unp_addr = malloc(unp2->unp_addrlen, 532 M_SONAME, M_WAITOK); 533 bcopy(unp2->unp_addr, unp3->unp_addr, 534 unp2->unp_addrlen); 535 unp3->unp_addrlen = unp2->unp_addrlen; 536 } 537 so2 = so3; 538 } 539 error = unp_connect2(so, so2); 540 bad: 541 vput(vp); 542 bad2: 543 free(sun, M_SONAME); 544 return (error); 545 } 546 547 int 548 unp_connect2(so, so2) 549 register struct socket *so; 550 register struct socket *so2; 551 { 552 register struct unpcb *unp = sotounpcb(so); 553 register struct unpcb *unp2; 554 555 if (so2->so_type != so->so_type) 556 return (EPROTOTYPE); 557 unp2 = sotounpcb(so2); 558 unp->unp_conn = unp2; 559 switch (so->so_type) { 560 561 case SOCK_DGRAM: 562 unp->unp_nextref = unp2->unp_refs; 563 unp2->unp_refs = unp; 564 soisconnected(so); 565 break; 566 567 case SOCK_STREAM: 568 unp2->unp_conn = unp; 569 soisconnected(so); 570 soisconnected(so2); 571 break; 572 573 default: 574 panic("unp_connect2"); 575 } 576 return (0); 577 } 578 579 void 580 unp_disconnect(unp) 581 struct unpcb *unp; 582 { 583 register struct unpcb *unp2 = unp->unp_conn; 584 585 if (unp2 == 0) 586 return; 587 unp->unp_conn = 0; 588 switch (unp->unp_socket->so_type) { 589 590 case SOCK_DGRAM: 591 if (unp2->unp_refs == unp) 592 unp2->unp_refs = unp->unp_nextref; 593 else { 594 unp2 = unp2->unp_refs; 595 for (;;) { 596 if (unp2 == 0) 597 panic("unp_disconnect"); 598 if (unp2->unp_nextref == unp) 599 break; 600 unp2 = unp2->unp_nextref; 601 } 602 unp2->unp_nextref = unp->unp_nextref; 603 } 604 unp->unp_nextref = 0; 605 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 606 break; 607 608 case SOCK_STREAM: 609 soisdisconnected(unp->unp_socket); 610 unp2->unp_conn = 0; 611 soisdisconnected(unp2->unp_socket); 612 break; 613 } 614 } 615 616 #ifdef notdef 617 unp_abort(unp) 618 struct unpcb *unp; 619 { 620 621 unp_detach(unp); 622 } 623 #endif 624 625 void 626 unp_shutdown(unp) 627 struct unpcb *unp; 628 { 629 struct socket *so; 630 631 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 632 (so = unp->unp_conn->unp_socket)) 633 socantrcvmore(so); 634 } 635 636 void 637 unp_drop(unp, errno) 638 struct unpcb *unp; 639 int errno; 640 { 641 struct socket *so = unp->unp_socket; 642 643 so->so_error = errno; 644 unp_disconnect(unp); 645 if (so->so_head) { 646 so->so_pcb = 0; 647 sofree(so); 648 if (unp->unp_addr) 649 free(unp->unp_addr, M_SONAME); 650 free(unp, M_PCB); 651 } 652 } 653 654 #ifdef notdef 655 unp_drain() 656 { 657 658 } 659 #endif 660 661 int 662 unp_externalize(rights) 663 struct mbuf *rights; 664 { 665 struct proc *p = curproc; /* XXX */ 666 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 667 register int i, *fdp = (int *)(cm + 1); 668 register struct file **rp = (struct file **)ALIGN(cm + 1); 669 register struct file *fp; 670 int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof (struct file *); 671 int f; 672 673 /* Make sure that the recipient has space */ 674 if (!fdavail(p, nfds)) { 675 for (i = 0; i < nfds; i++) { 676 fp = *rp; 677 unp_discard(fp); 678 *rp++ = 0; 679 } 680 return (EMSGSIZE); 681 } 682 683 /* 684 * Add file to the recipient's open file table, converting them 685 * to integer file descriptors as we go. Done in forward order 686 * because an integer will always come in the same place or before 687 * its corresponding struct file pointer. 688 */ 689 for (i = 0; i < nfds; i++) { 690 if (fdalloc(p, 0, &f)) 691 panic("unp_externalize"); 692 fp = *rp; 693 p->p_fd->fd_ofiles[f] = fp; 694 fp->f_msgcount--; 695 unp_rights--; 696 *fdp++ = f; 697 } 698 699 /* 700 * Adjust length, in case of transition from large struct file 701 * pointers to ints. 702 */ 703 cm->cmsg_len = sizeof(*cm) + (nfds * sizeof(int)); 704 rights->m_len = cm->cmsg_len; 705 return (0); 706 } 707 708 int 709 unp_internalize(control, p) 710 struct mbuf *control; 711 struct proc *p; 712 { 713 struct filedesc *fdescp = p->p_fd; 714 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 715 register struct file **rp; 716 register struct file *fp; 717 register int i, fd, *fdp; 718 int nfds; 719 u_int neededspace; 720 721 /* Sanity check the control message header */ 722 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 723 cm->cmsg_len != control->m_len) 724 return (EINVAL); 725 726 /* Verify that the file descriptors are valid */ 727 nfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 728 fdp = (int *)(cm + 1); 729 for (i = 0; i < nfds; i++) { 730 fd = *fdp++; 731 if ((unsigned)fd >= fdescp->fd_nfiles || 732 fdescp->fd_ofiles[fd] == NULL) 733 return (EBADF); 734 } 735 736 /* Make sure we have room for the struct file pointers */ 737 morespace: 738 neededspace = (ALIGN(sizeof (*cm)) + nfds * sizeof (struct file *)) - 739 control->m_len; 740 if (neededspace > M_TRAILINGSPACE(control)) { 741 742 /* if we already have a cluster, the message is just too big */ 743 if (control->m_flags & M_EXT) 744 return (E2BIG); 745 746 /* allocate a cluster and try again */ 747 MCLGET(control, M_WAIT); 748 if ((control->m_flags & M_EXT) == 0) 749 return (ENOBUFS); /* allocation failed */ 750 751 /* copy the data to the cluster */ 752 bcopy(cm, mtod(control, char *), cm->cmsg_len); 753 cm = mtod(control, struct cmsghdr *); 754 goto morespace; 755 } 756 757 /* adjust message & mbuf to note amount of space actually used. */ 758 cm->cmsg_len += neededspace; 759 control->m_len = cm->cmsg_len; 760 761 /* 762 * Transform the file descriptors into struct file pointers, in 763 * reverse order so that if pointers are bigger than ints, the 764 * int won't get until we're done. 765 */ 766 fdp = ((int *)(cm + 1)) + nfds - 1; 767 rp = ((struct file **)ALIGN(cm + 1)) + nfds - 1; 768 for (i = 0; i < nfds; i++) { 769 fp = fdescp->fd_ofiles[*fdp--]; 770 *rp-- = fp; 771 fp->f_count++; 772 fp->f_msgcount++; 773 unp_rights++; 774 } 775 return (0); 776 } 777 778 int unp_defer, unp_gcing; 779 extern struct domain unixdomain; 780 781 void 782 unp_gc() 783 { 784 register struct file *fp, *nextfp; 785 register struct socket *so; 786 struct file **extra_ref, **fpp; 787 int nunref, i; 788 789 if (unp_gcing) 790 return; 791 unp_gcing = 1; 792 unp_defer = 0; 793 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 794 fp->f_flag &= ~(FMARK|FDEFER); 795 do { 796 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 797 if (fp->f_count == 0) 798 continue; 799 if (fp->f_flag & FDEFER) { 800 fp->f_flag &= ~FDEFER; 801 unp_defer--; 802 } else { 803 if (fp->f_flag & FMARK) 804 continue; 805 if (fp->f_count == fp->f_msgcount) 806 continue; 807 fp->f_flag |= FMARK; 808 } 809 if (fp->f_type != DTYPE_SOCKET || 810 (so = (struct socket *)fp->f_data) == 0) 811 continue; 812 if (so->so_proto->pr_domain != &unixdomain || 813 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 814 continue; 815 #ifdef notdef 816 if (so->so_rcv.sb_flags & SB_LOCK) { 817 /* 818 * This is problematical; it's not clear 819 * we need to wait for the sockbuf to be 820 * unlocked (on a uniprocessor, at least), 821 * and it's also not clear what to do 822 * if sbwait returns an error due to receipt 823 * of a signal. If sbwait does return 824 * an error, we'll go into an infinite 825 * loop. Delete all of this for now. 826 */ 827 (void) sbwait(&so->so_rcv); 828 goto restart; 829 } 830 #endif 831 unp_scan(so->so_rcv.sb_mb, unp_mark); 832 } 833 } while (unp_defer); 834 /* 835 * We grab an extra reference to each of the file table entries 836 * that are not otherwise accessible and then free the rights 837 * that are stored in messages on them. 838 * 839 * The bug in the orginal code is a little tricky, so I'll describe 840 * what's wrong with it here. 841 * 842 * It is incorrect to simply unp_discard each entry for f_msgcount 843 * times -- consider the case of sockets A and B that contain 844 * references to each other. On a last close of some other socket, 845 * we trigger a gc since the number of outstanding rights (unp_rights) 846 * is non-zero. If during the sweep phase the gc code un_discards, 847 * we end up doing a (full) closef on the descriptor. A closef on A 848 * results in the following chain. Closef calls soo_close, which 849 * calls soclose. Soclose calls first (through the switch 850 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 851 * returns because the previous instance had set unp_gcing, and 852 * we return all the way back to soclose, which marks the socket 853 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 854 * to free up the rights that are queued in messages on the socket A, 855 * i.e., the reference on B. The sorflush calls via the dom_dispose 856 * switch unp_dispose, which unp_scans with unp_discard. This second 857 * instance of unp_discard just calls closef on B. 858 * 859 * Well, a similar chain occurs on B, resulting in a sorflush on B, 860 * which results in another closef on A. Unfortunately, A is already 861 * being closed, and the descriptor has already been marked with 862 * SS_NOFDREF, and soclose panics at this point. 863 * 864 * Here, we first take an extra reference to each inaccessible 865 * descriptor. Then, we call sorflush ourself, since we know 866 * it is a Unix domain socket anyhow. After we destroy all the 867 * rights carried in messages, we do a last closef to get rid 868 * of our extra reference. This is the last close, and the 869 * unp_detach etc will shut down the socket. 870 * 871 * 91/09/19, bsy@cs.cmu.edu 872 */ 873 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 874 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 875 fp = nextfp) { 876 nextfp = fp->f_list.le_next; 877 if (fp->f_count == 0) 878 continue; 879 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 880 *fpp++ = fp; 881 nunref++; 882 fp->f_count++; 883 } 884 } 885 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 886 sorflush((struct socket *)(*fpp)->f_data); 887 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 888 (void) closef(*fpp, (struct proc *)0); 889 free((caddr_t)extra_ref, M_FILE); 890 unp_gcing = 0; 891 } 892 893 void 894 unp_dispose(m) 895 struct mbuf *m; 896 { 897 898 if (m) 899 unp_scan(m, unp_discard); 900 } 901 902 void 903 unp_scan(m0, op) 904 register struct mbuf *m0; 905 void (*op) __P((struct file *)); 906 { 907 register struct mbuf *m; 908 register struct file **rp; 909 register struct cmsghdr *cm; 910 register int i; 911 int qfds; 912 913 while (m0) { 914 for (m = m0; m; m = m->m_next) 915 if (m->m_type == MT_CONTROL && 916 m->m_len >= sizeof(*cm)) { 917 cm = mtod(m, struct cmsghdr *); 918 if (cm->cmsg_level != SOL_SOCKET || 919 cm->cmsg_type != SCM_RIGHTS) 920 continue; 921 qfds = (cm->cmsg_len - sizeof *cm) 922 / sizeof (struct file *); 923 rp = (struct file **)(cm + 1); 924 for (i = 0; i < qfds; i++) 925 (*op)(*rp++); 926 break; /* XXX, but saves time */ 927 } 928 m0 = m0->m_act; 929 } 930 } 931 932 void 933 unp_mark(fp) 934 struct file *fp; 935 { 936 937 if (fp->f_flag & FMARK) 938 return; 939 unp_defer++; 940 fp->f_flag |= (FMARK|FDEFER); 941 } 942 943 void 944 unp_discard(fp) 945 struct file *fp; 946 { 947 948 fp->f_msgcount--; 949 unp_rights--; 950 (void) closef(fp, (struct proc *)0); 951 } 952