1 /* $NetBSD: uipc_usrreq.c,v 1.24 1997/04/10 01:51:21 cgd Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/filedesc.h> 43 #include <sys/domain.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/unpcb.h> 48 #include <sys/un.h> 49 #include <sys/namei.h> 50 #include <sys/vnode.h> 51 #include <sys/file.h> 52 #include <sys/stat.h> 53 #include <sys/mbuf.h> 54 55 /* 56 * Unix communications domain. 57 * 58 * TODO: 59 * SEQPACKET, RDM 60 * rethink name space problems 61 * need a proper out-of-band 62 */ 63 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX }; 64 ino_t unp_ino; /* prototype for fake inode numbers */ 65 66 int 67 unp_output(m, control, unp) 68 struct mbuf *m, *control; 69 struct unpcb *unp; 70 { 71 struct socket *so2; 72 struct sockaddr_un *sun; 73 74 so2 = unp->unp_conn->unp_socket; 75 if (unp->unp_addr) 76 sun = unp->unp_addr; 77 else 78 sun = &sun_noname; 79 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 80 control) == 0) { 81 m_freem(control); 82 m_freem(m); 83 return (EINVAL); 84 } else { 85 sorwakeup(so2); 86 return (0); 87 } 88 } 89 90 void 91 unp_setsockaddr(unp, nam) 92 register struct unpcb *unp; 93 struct mbuf *nam; 94 { 95 struct sockaddr_un *sun; 96 97 if (unp->unp_addr) 98 sun = unp->unp_addr; 99 else 100 sun = &sun_noname; 101 nam->m_len = sun->sun_len; 102 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 103 } 104 105 void 106 unp_setpeeraddr(unp, nam) 107 register struct unpcb *unp; 108 struct mbuf *nam; 109 { 110 struct sockaddr_un *sun; 111 112 if (unp->unp_conn && unp->unp_conn->unp_addr) 113 sun = unp->unp_conn->unp_addr; 114 else 115 sun = &sun_noname; 116 nam->m_len = sun->sun_len; 117 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 118 } 119 120 /*ARGSUSED*/ 121 int 122 uipc_usrreq(so, req, m, nam, control, p) 123 struct socket *so; 124 int req; 125 struct mbuf *m, *nam, *control; 126 struct proc *p; 127 { 128 struct unpcb *unp = sotounpcb(so); 129 register struct socket *so2; 130 register int error = 0; 131 132 if (req == PRU_CONTROL) 133 return (EOPNOTSUPP); 134 135 #ifdef DIAGNOSTIC 136 if (req != PRU_SEND && req != PRU_SENDOOB && control) 137 panic("uipc_usrreq: unexpected control mbuf"); 138 #endif 139 if (unp == 0 && req != PRU_ATTACH) { 140 error = EINVAL; 141 goto release; 142 } 143 144 switch (req) { 145 146 case PRU_ATTACH: 147 if (unp != 0) { 148 error = EISCONN; 149 break; 150 } 151 error = unp_attach(so); 152 break; 153 154 case PRU_DETACH: 155 unp_detach(unp); 156 break; 157 158 case PRU_BIND: 159 error = unp_bind(unp, nam, p); 160 break; 161 162 case PRU_LISTEN: 163 if (unp->unp_vnode == 0) 164 error = EINVAL; 165 break; 166 167 case PRU_CONNECT: 168 error = unp_connect(so, nam, p); 169 break; 170 171 case PRU_CONNECT2: 172 error = unp_connect2(so, (struct socket *)nam); 173 break; 174 175 case PRU_DISCONNECT: 176 unp_disconnect(unp); 177 break; 178 179 case PRU_ACCEPT: 180 unp_setpeeraddr(unp, nam); 181 break; 182 183 case PRU_SHUTDOWN: 184 socantsendmore(so); 185 unp_shutdown(unp); 186 break; 187 188 case PRU_RCVD: 189 switch (so->so_type) { 190 191 case SOCK_DGRAM: 192 panic("uipc 1"); 193 /*NOTREACHED*/ 194 195 case SOCK_STREAM: 196 #define rcv (&so->so_rcv) 197 #define snd (&so2->so_snd) 198 if (unp->unp_conn == 0) 199 break; 200 so2 = unp->unp_conn->unp_socket; 201 /* 202 * Adjust backpressure on sender 203 * and wakeup any waiting to write. 204 */ 205 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 206 unp->unp_mbcnt = rcv->sb_mbcnt; 207 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 208 unp->unp_cc = rcv->sb_cc; 209 sowwakeup(so2); 210 #undef snd 211 #undef rcv 212 break; 213 214 default: 215 panic("uipc 2"); 216 } 217 break; 218 219 case PRU_SEND: 220 if (control && (error = unp_internalize(control, p))) 221 break; 222 switch (so->so_type) { 223 224 case SOCK_DGRAM: { 225 if (nam) { 226 if ((so->so_state & SS_ISCONNECTED) != 0) { 227 error = EISCONN; 228 goto die; 229 } 230 error = unp_connect(so, nam, p); 231 if (error) { 232 die: 233 m_freem(control); 234 m_freem(m); 235 break; 236 } 237 } else { 238 if ((so->so_state & SS_ISCONNECTED) == 0) { 239 error = ENOTCONN; 240 goto die; 241 } 242 } 243 error = unp_output(m, control, unp); 244 if (nam) 245 unp_disconnect(unp); 246 break; 247 } 248 249 case SOCK_STREAM: 250 #define rcv (&so2->so_rcv) 251 #define snd (&so->so_snd) 252 if (unp->unp_conn == 0) 253 panic("uipc 3"); 254 so2 = unp->unp_conn->unp_socket; 255 /* 256 * Send to paired receive port, and then reduce 257 * send buffer hiwater marks to maintain backpressure. 258 * Wake up readers. 259 */ 260 if (control) { 261 if (sbappendcontrol(rcv, m, control) == 0) 262 m_freem(control); 263 } else 264 sbappend(rcv, m); 265 snd->sb_mbmax -= 266 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 267 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 268 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 269 unp->unp_conn->unp_cc = rcv->sb_cc; 270 sorwakeup(so2); 271 #undef snd 272 #undef rcv 273 break; 274 275 default: 276 panic("uipc 4"); 277 } 278 break; 279 280 case PRU_ABORT: 281 unp_drop(unp, ECONNABORTED); 282 break; 283 284 case PRU_SENSE: 285 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 286 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 287 so2 = unp->unp_conn->unp_socket; 288 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 289 } 290 ((struct stat *) m)->st_dev = NODEV; 291 if (unp->unp_ino == 0) 292 unp->unp_ino = unp_ino++; 293 ((struct stat *) m)->st_ino = unp->unp_ino; 294 return (0); 295 296 case PRU_RCVOOB: 297 error = EOPNOTSUPP; 298 break; 299 300 case PRU_SENDOOB: 301 m_freem(control); 302 m_freem(m); 303 error = EOPNOTSUPP; 304 break; 305 306 case PRU_SOCKADDR: 307 unp_setsockaddr(unp, nam); 308 break; 309 310 case PRU_PEERADDR: 311 unp_setpeeraddr(unp, nam); 312 break; 313 314 default: 315 panic("piusrreq"); 316 } 317 318 release: 319 return (error); 320 } 321 322 /* 323 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 324 * for stream sockets, although the total for sender and receiver is 325 * actually only PIPSIZ. 326 * Datagram sockets really use the sendspace as the maximum datagram size, 327 * and don't really want to reserve the sendspace. Their recvspace should 328 * be large enough for at least one max-size datagram plus address. 329 */ 330 #define PIPSIZ 4096 331 u_long unpst_sendspace = PIPSIZ; 332 u_long unpst_recvspace = PIPSIZ; 333 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 334 u_long unpdg_recvspace = 4*1024; 335 336 int unp_rights; /* file descriptors in flight */ 337 338 int 339 unp_attach(so) 340 struct socket *so; 341 { 342 register struct unpcb *unp; 343 int error; 344 345 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 346 switch (so->so_type) { 347 348 case SOCK_STREAM: 349 error = soreserve(so, unpst_sendspace, unpst_recvspace); 350 break; 351 352 case SOCK_DGRAM: 353 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 354 break; 355 356 default: 357 panic("unp_attach"); 358 } 359 if (error) 360 return (error); 361 } 362 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 363 if (unp == NULL) 364 return (ENOBUFS); 365 bzero((caddr_t)unp, sizeof(*unp)); 366 unp->unp_socket = so; 367 so->so_pcb = unp; 368 return (0); 369 } 370 371 void 372 unp_detach(unp) 373 register struct unpcb *unp; 374 { 375 376 if (unp->unp_vnode) { 377 unp->unp_vnode->v_socket = 0; 378 vrele(unp->unp_vnode); 379 unp->unp_vnode = 0; 380 } 381 if (unp->unp_conn) 382 unp_disconnect(unp); 383 while (unp->unp_refs) 384 unp_drop(unp->unp_refs, ECONNRESET); 385 soisdisconnected(unp->unp_socket); 386 unp->unp_socket->so_pcb = 0; 387 if (unp->unp_addr) 388 m_freem(dtom(unp->unp_addr)); 389 if (unp_rights) { 390 /* 391 * Normally the receive buffer is flushed later, 392 * in sofree, but if our receive buffer holds references 393 * to descriptors that are now garbage, we will dispose 394 * of those descriptor references after the garbage collector 395 * gets them (resulting in a "panic: closef: count < 0"). 396 */ 397 sorflush(unp->unp_socket); 398 free(unp, M_PCB); 399 unp_gc(); 400 } else 401 free(unp, M_PCB); 402 } 403 404 int 405 unp_bind(unp, nam, p) 406 struct unpcb *unp; 407 struct mbuf *nam; 408 struct proc *p; 409 { 410 struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *); 411 register struct vnode *vp; 412 struct vattr vattr; 413 int error; 414 struct nameidata nd; 415 416 if (unp->unp_vnode != 0) 417 return (EINVAL); 418 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 419 sun->sun_path, p); 420 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ 421 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) 422 return (EINVAL); 423 } else 424 *(mtod(nam, caddr_t) + nam->m_len) = 0; 425 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 426 if ((error = namei(&nd)) != 0) 427 return (error); 428 vp = nd.ni_vp; 429 if (vp != NULL) { 430 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 431 if (nd.ni_dvp == vp) 432 vrele(nd.ni_dvp); 433 else 434 vput(nd.ni_dvp); 435 vrele(vp); 436 return (EADDRINUSE); 437 } 438 VATTR_NULL(&vattr); 439 vattr.va_type = VSOCK; 440 vattr.va_mode = ACCESSPERMS; 441 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 442 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 443 if (error) 444 return (error); 445 vp = nd.ni_vp; 446 vp->v_socket = unp->unp_socket; 447 unp->unp_vnode = vp; 448 unp->unp_addr = 449 mtod(m_copy(nam, 0, (int)M_COPYALL), struct sockaddr_un *); 450 VOP_UNLOCK(vp); 451 return (0); 452 } 453 454 int 455 unp_connect(so, nam, p) 456 struct socket *so; 457 struct mbuf *nam; 458 struct proc *p; 459 { 460 register struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *); 461 register struct vnode *vp; 462 register struct socket *so2, *so3; 463 struct unpcb *unp2, *unp3; 464 int error; 465 struct nameidata nd; 466 467 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 468 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ 469 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) 470 return (EINVAL); 471 } else 472 *(mtod(nam, caddr_t) + nam->m_len) = 0; 473 if ((error = namei(&nd)) != 0) 474 return (error); 475 vp = nd.ni_vp; 476 if (vp->v_type != VSOCK) { 477 error = ENOTSOCK; 478 goto bad; 479 } 480 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 481 goto bad; 482 so2 = vp->v_socket; 483 if (so2 == 0) { 484 error = ECONNREFUSED; 485 goto bad; 486 } 487 if (so->so_type != so2->so_type) { 488 error = EPROTOTYPE; 489 goto bad; 490 } 491 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 492 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 493 (so3 = sonewconn(so2, 0)) == 0) { 494 error = ECONNREFUSED; 495 goto bad; 496 } 497 unp2 = sotounpcb(so2); 498 unp3 = sotounpcb(so3); 499 if (unp2->unp_addr) 500 unp3->unp_addr = mtod(m_copy(dtom(unp2->unp_addr), 0, 501 (int)M_COPYALL), struct sockaddr_un *); 502 so2 = so3; 503 } 504 error = unp_connect2(so, so2); 505 bad: 506 vput(vp); 507 return (error); 508 } 509 510 int 511 unp_connect2(so, so2) 512 register struct socket *so; 513 register struct socket *so2; 514 { 515 register struct unpcb *unp = sotounpcb(so); 516 register struct unpcb *unp2; 517 518 if (so2->so_type != so->so_type) 519 return (EPROTOTYPE); 520 unp2 = sotounpcb(so2); 521 unp->unp_conn = unp2; 522 switch (so->so_type) { 523 524 case SOCK_DGRAM: 525 unp->unp_nextref = unp2->unp_refs; 526 unp2->unp_refs = unp; 527 soisconnected(so); 528 break; 529 530 case SOCK_STREAM: 531 unp2->unp_conn = unp; 532 soisconnected(so); 533 soisconnected(so2); 534 break; 535 536 default: 537 panic("unp_connect2"); 538 } 539 return (0); 540 } 541 542 void 543 unp_disconnect(unp) 544 struct unpcb *unp; 545 { 546 register struct unpcb *unp2 = unp->unp_conn; 547 548 if (unp2 == 0) 549 return; 550 unp->unp_conn = 0; 551 switch (unp->unp_socket->so_type) { 552 553 case SOCK_DGRAM: 554 if (unp2->unp_refs == unp) 555 unp2->unp_refs = unp->unp_nextref; 556 else { 557 unp2 = unp2->unp_refs; 558 for (;;) { 559 if (unp2 == 0) 560 panic("unp_disconnect"); 561 if (unp2->unp_nextref == unp) 562 break; 563 unp2 = unp2->unp_nextref; 564 } 565 unp2->unp_nextref = unp->unp_nextref; 566 } 567 unp->unp_nextref = 0; 568 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 569 break; 570 571 case SOCK_STREAM: 572 soisdisconnected(unp->unp_socket); 573 unp2->unp_conn = 0; 574 soisdisconnected(unp2->unp_socket); 575 break; 576 } 577 } 578 579 #ifdef notdef 580 unp_abort(unp) 581 struct unpcb *unp; 582 { 583 584 unp_detach(unp); 585 } 586 #endif 587 588 void 589 unp_shutdown(unp) 590 struct unpcb *unp; 591 { 592 struct socket *so; 593 594 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 595 (so = unp->unp_conn->unp_socket)) 596 socantrcvmore(so); 597 } 598 599 void 600 unp_drop(unp, errno) 601 struct unpcb *unp; 602 int errno; 603 { 604 struct socket *so = unp->unp_socket; 605 606 so->so_error = errno; 607 unp_disconnect(unp); 608 if (so->so_head) { 609 so->so_pcb = 0; 610 sofree(so); 611 if (unp->unp_addr) 612 m_freem(dtom(unp->unp_addr)); 613 free(unp, M_PCB); 614 } 615 } 616 617 #ifdef notdef 618 unp_drain() 619 { 620 621 } 622 #endif 623 624 int 625 unp_externalize(rights) 626 struct mbuf *rights; 627 { 628 struct proc *p = curproc; /* XXX */ 629 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 630 register int i, *fdp = (int *)(cm + 1); 631 register struct file **rp = (struct file **)ALIGN(cm + 1); 632 register struct file *fp; 633 int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof (struct file *); 634 int f; 635 636 /* Make sure that the recipient has space */ 637 if (!fdavail(p, nfds)) { 638 for (i = 0; i < nfds; i++) { 639 fp = *rp; 640 unp_discard(fp); 641 *rp++ = 0; 642 } 643 return (EMSGSIZE); 644 } 645 646 /* 647 * Add file to the recipient's open file table, converting them 648 * to integer file descriptors as we go. Done in forward order 649 * because an integer will always come in the same place or before 650 * its corresponding struct file pointer. 651 */ 652 for (i = 0; i < nfds; i++) { 653 if (fdalloc(p, 0, &f)) 654 panic("unp_externalize"); 655 fp = *rp; 656 p->p_fd->fd_ofiles[f] = fp; 657 fp->f_msgcount--; 658 unp_rights--; 659 *fdp++ = f; 660 } 661 662 /* 663 * Adjust length, in case of transition from large struct file 664 * pointers to ints. 665 */ 666 cm->cmsg_len = sizeof(*cm) + (nfds * sizeof(int)); 667 rights->m_len = cm->cmsg_len; 668 return (0); 669 } 670 671 int 672 unp_internalize(control, p) 673 struct mbuf *control; 674 struct proc *p; 675 { 676 struct filedesc *fdescp = p->p_fd; 677 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 678 register struct file **rp; 679 register struct file *fp; 680 register int i, fd, *fdp; 681 int nfds; 682 u_int neededspace; 683 684 /* Sanity check the control message header */ 685 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 686 cm->cmsg_len != control->m_len) 687 return (EINVAL); 688 689 /* Verify that the file descriptors are valid */ 690 nfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 691 fdp = (int *)(cm + 1); 692 for (i = 0; i < nfds; i++) { 693 fd = *fdp++; 694 if ((unsigned)fd >= fdescp->fd_nfiles || 695 fdescp->fd_ofiles[fd] == NULL) 696 return (EBADF); 697 } 698 699 /* Make sure we have room for the struct file pointers */ 700 morespace: 701 neededspace = (ALIGN(sizeof (*cm)) + nfds * sizeof (struct file *)) - 702 control->m_len; 703 if (neededspace > M_TRAILINGSPACE(control)) { 704 705 /* if we already have a cluster, the message is just too big */ 706 if (control->m_flags & M_EXT) 707 return (E2BIG); 708 709 /* allocate a cluster and try again */ 710 MCLGET(control, M_WAIT); 711 if ((control->m_flags & M_EXT) == 0) 712 return (ENOBUFS); /* allocation failed */ 713 714 /* copy the data to the cluster */ 715 bcopy(cm, mtod(control, char *), cm->cmsg_len); 716 cm = mtod(control, struct cmsghdr *); 717 goto morespace; 718 } 719 720 /* adjust message & mbuf to note amount of space actually used. */ 721 cm->cmsg_len += neededspace; 722 control->m_len = cm->cmsg_len; 723 724 /* 725 * Transform the file descriptors into struct file pointers, in 726 * reverse order so that if pointers are bigger than ints, the 727 * int won't get until we're done. 728 */ 729 fdp = ((int *)(cm + 1)) + nfds - 1; 730 rp = ((struct file **)ALIGN(cm + 1)) + nfds - 1; 731 for (i = 0; i < nfds; i++) { 732 fp = fdescp->fd_ofiles[*fdp]; 733 *rp-- = fp; 734 fp->f_count++; 735 fp->f_msgcount++; 736 unp_rights++; 737 } 738 return (0); 739 } 740 741 int unp_defer, unp_gcing; 742 extern struct domain unixdomain; 743 744 void 745 unp_gc() 746 { 747 register struct file *fp, *nextfp; 748 register struct socket *so; 749 struct file **extra_ref, **fpp; 750 int nunref, i; 751 752 if (unp_gcing) 753 return; 754 unp_gcing = 1; 755 unp_defer = 0; 756 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 757 fp->f_flag &= ~(FMARK|FDEFER); 758 do { 759 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 760 if (fp->f_count == 0) 761 continue; 762 if (fp->f_flag & FDEFER) { 763 fp->f_flag &= ~FDEFER; 764 unp_defer--; 765 } else { 766 if (fp->f_flag & FMARK) 767 continue; 768 if (fp->f_count == fp->f_msgcount) 769 continue; 770 fp->f_flag |= FMARK; 771 } 772 if (fp->f_type != DTYPE_SOCKET || 773 (so = (struct socket *)fp->f_data) == 0) 774 continue; 775 if (so->so_proto->pr_domain != &unixdomain || 776 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 777 continue; 778 #ifdef notdef 779 if (so->so_rcv.sb_flags & SB_LOCK) { 780 /* 781 * This is problematical; it's not clear 782 * we need to wait for the sockbuf to be 783 * unlocked (on a uniprocessor, at least), 784 * and it's also not clear what to do 785 * if sbwait returns an error due to receipt 786 * of a signal. If sbwait does return 787 * an error, we'll go into an infinite 788 * loop. Delete all of this for now. 789 */ 790 (void) sbwait(&so->so_rcv); 791 goto restart; 792 } 793 #endif 794 unp_scan(so->so_rcv.sb_mb, unp_mark); 795 } 796 } while (unp_defer); 797 /* 798 * We grab an extra reference to each of the file table entries 799 * that are not otherwise accessible and then free the rights 800 * that are stored in messages on them. 801 * 802 * The bug in the orginal code is a little tricky, so I'll describe 803 * what's wrong with it here. 804 * 805 * It is incorrect to simply unp_discard each entry for f_msgcount 806 * times -- consider the case of sockets A and B that contain 807 * references to each other. On a last close of some other socket, 808 * we trigger a gc since the number of outstanding rights (unp_rights) 809 * is non-zero. If during the sweep phase the gc code un_discards, 810 * we end up doing a (full) closef on the descriptor. A closef on A 811 * results in the following chain. Closef calls soo_close, which 812 * calls soclose. Soclose calls first (through the switch 813 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 814 * returns because the previous instance had set unp_gcing, and 815 * we return all the way back to soclose, which marks the socket 816 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 817 * to free up the rights that are queued in messages on the socket A, 818 * i.e., the reference on B. The sorflush calls via the dom_dispose 819 * switch unp_dispose, which unp_scans with unp_discard. This second 820 * instance of unp_discard just calls closef on B. 821 * 822 * Well, a similar chain occurs on B, resulting in a sorflush on B, 823 * which results in another closef on A. Unfortunately, A is already 824 * being closed, and the descriptor has already been marked with 825 * SS_NOFDREF, and soclose panics at this point. 826 * 827 * Here, we first take an extra reference to each inaccessible 828 * descriptor. Then, we call sorflush ourself, since we know 829 * it is a Unix domain socket anyhow. After we destroy all the 830 * rights carried in messages, we do a last closef to get rid 831 * of our extra reference. This is the last close, and the 832 * unp_detach etc will shut down the socket. 833 * 834 * 91/09/19, bsy@cs.cmu.edu 835 */ 836 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 837 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 838 fp = nextfp) { 839 nextfp = fp->f_list.le_next; 840 if (fp->f_count == 0) 841 continue; 842 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 843 *fpp++ = fp; 844 nunref++; 845 fp->f_count++; 846 } 847 } 848 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 849 sorflush((struct socket *)(*fpp)->f_data); 850 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 851 (void) closef(*fpp, (struct proc *)0); 852 free((caddr_t)extra_ref, M_FILE); 853 unp_gcing = 0; 854 } 855 856 void 857 unp_dispose(m) 858 struct mbuf *m; 859 { 860 861 if (m) 862 unp_scan(m, unp_discard); 863 } 864 865 void 866 unp_scan(m0, op) 867 register struct mbuf *m0; 868 void (*op) __P((struct file *)); 869 { 870 register struct mbuf *m; 871 register struct file **rp; 872 register struct cmsghdr *cm; 873 register int i; 874 int qfds; 875 876 while (m0) { 877 for (m = m0; m; m = m->m_next) 878 if (m->m_type == MT_CONTROL && 879 m->m_len >= sizeof(*cm)) { 880 cm = mtod(m, struct cmsghdr *); 881 if (cm->cmsg_level != SOL_SOCKET || 882 cm->cmsg_type != SCM_RIGHTS) 883 continue; 884 qfds = (cm->cmsg_len - sizeof *cm) 885 / sizeof (struct file *); 886 rp = (struct file **)(cm + 1); 887 for (i = 0; i < qfds; i++) 888 (*op)(*rp++); 889 break; /* XXX, but saves time */ 890 } 891 m0 = m0->m_act; 892 } 893 } 894 895 void 896 unp_mark(fp) 897 struct file *fp; 898 { 899 900 if (fp->f_flag & FMARK) 901 return; 902 unp_defer++; 903 fp->f_flag |= (FMARK|FDEFER); 904 } 905 906 void 907 unp_discard(fp) 908 struct file *fp; 909 { 910 911 fp->f_msgcount--; 912 unp_rights--; 913 (void) closef(fp, (struct proc *)0); 914 } 915