1 /* $NetBSD: uipc_usrreq.c,v 1.30 1998/01/07 22:57:09 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 42 * Copyright (c) 1982, 1986, 1989, 1991, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. All advertising materials mentioning features or use of this software 54 * must display the following acknowledgement: 55 * This product includes software developed by the University of 56 * California, Berkeley and its contributors. 57 * 4. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/filedesc.h> 80 #include <sys/domain.h> 81 #include <sys/protosw.h> 82 #include <sys/socket.h> 83 #include <sys/socketvar.h> 84 #include <sys/unpcb.h> 85 #include <sys/un.h> 86 #include <sys/namei.h> 87 #include <sys/vnode.h> 88 #include <sys/file.h> 89 #include <sys/stat.h> 90 #include <sys/mbuf.h> 91 92 /* 93 * Unix communications domain. 94 * 95 * TODO: 96 * SEQPACKET, RDM 97 * rethink name space problems 98 * need a proper out-of-band 99 */ 100 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX }; 101 ino_t unp_ino; /* prototype for fake inode numbers */ 102 103 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *)); 104 105 int 106 unp_output(m, control, unp, p) 107 struct mbuf *m, *control; 108 struct unpcb *unp; 109 struct proc *p; 110 { 111 struct socket *so2; 112 struct sockaddr_un *sun; 113 114 so2 = unp->unp_conn->unp_socket; 115 if (unp->unp_addr) 116 sun = unp->unp_addr; 117 else 118 sun = &sun_noname; 119 if (unp->unp_conn->unp_flags & UNP_WANTCRED) 120 control = unp_addsockcred(p, control); 121 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 122 control) == 0) { 123 m_freem(control); 124 m_freem(m); 125 return (EINVAL); 126 } else { 127 sorwakeup(so2); 128 return (0); 129 } 130 } 131 132 void 133 unp_setsockaddr(unp, nam) 134 register struct unpcb *unp; 135 struct mbuf *nam; 136 { 137 struct sockaddr_un *sun; 138 139 if (unp->unp_addr) 140 sun = unp->unp_addr; 141 else 142 sun = &sun_noname; 143 nam->m_len = sun->sun_len; 144 if (nam->m_len > MLEN) 145 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 146 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 147 } 148 149 void 150 unp_setpeeraddr(unp, nam) 151 register struct unpcb *unp; 152 struct mbuf *nam; 153 { 154 struct sockaddr_un *sun; 155 156 if (unp->unp_conn && unp->unp_conn->unp_addr) 157 sun = unp->unp_conn->unp_addr; 158 else 159 sun = &sun_noname; 160 nam->m_len = sun->sun_len; 161 if (nam->m_len > MLEN) 162 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 163 bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len); 164 } 165 166 /*ARGSUSED*/ 167 int 168 uipc_usrreq(so, req, m, nam, control, p) 169 struct socket *so; 170 int req; 171 struct mbuf *m, *nam, *control; 172 struct proc *p; 173 { 174 struct unpcb *unp = sotounpcb(so); 175 register struct socket *so2; 176 register int error = 0; 177 178 if (req == PRU_CONTROL) 179 return (EOPNOTSUPP); 180 181 #ifdef DIAGNOSTIC 182 if (req != PRU_SEND && req != PRU_SENDOOB && control) 183 panic("uipc_usrreq: unexpected control mbuf"); 184 #endif 185 if (unp == 0 && req != PRU_ATTACH) { 186 error = EINVAL; 187 goto release; 188 } 189 190 switch (req) { 191 192 case PRU_ATTACH: 193 if (unp != 0) { 194 error = EISCONN; 195 break; 196 } 197 error = unp_attach(so); 198 break; 199 200 case PRU_DETACH: 201 unp_detach(unp); 202 break; 203 204 case PRU_BIND: 205 error = unp_bind(unp, nam, p); 206 break; 207 208 case PRU_LISTEN: 209 if (unp->unp_vnode == 0) 210 error = EINVAL; 211 break; 212 213 case PRU_CONNECT: 214 error = unp_connect(so, nam, p); 215 break; 216 217 case PRU_CONNECT2: 218 error = unp_connect2(so, (struct socket *)nam); 219 break; 220 221 case PRU_DISCONNECT: 222 unp_disconnect(unp); 223 break; 224 225 case PRU_ACCEPT: 226 unp_setpeeraddr(unp, nam); 227 break; 228 229 case PRU_SHUTDOWN: 230 socantsendmore(so); 231 unp_shutdown(unp); 232 break; 233 234 case PRU_RCVD: 235 switch (so->so_type) { 236 237 case SOCK_DGRAM: 238 panic("uipc 1"); 239 /*NOTREACHED*/ 240 241 case SOCK_STREAM: 242 #define rcv (&so->so_rcv) 243 #define snd (&so2->so_snd) 244 if (unp->unp_conn == 0) 245 break; 246 so2 = unp->unp_conn->unp_socket; 247 /* 248 * Adjust backpressure on sender 249 * and wakeup any waiting to write. 250 */ 251 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 252 unp->unp_mbcnt = rcv->sb_mbcnt; 253 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 254 unp->unp_cc = rcv->sb_cc; 255 sowwakeup(so2); 256 #undef snd 257 #undef rcv 258 break; 259 260 default: 261 panic("uipc 2"); 262 } 263 break; 264 265 case PRU_SEND: 266 /* 267 * Note: unp_internalize() rejects any control message 268 * other than SCM_RIGHTS, and only allows one. This 269 * has the side-effect of preventing a caller from 270 * forging SCM_CREDS. 271 */ 272 if (control && (error = unp_internalize(control, p))) 273 break; 274 switch (so->so_type) { 275 276 case SOCK_DGRAM: { 277 if (nam) { 278 if ((so->so_state & SS_ISCONNECTED) != 0) { 279 error = EISCONN; 280 goto die; 281 } 282 error = unp_connect(so, nam, p); 283 if (error) { 284 die: 285 m_freem(control); 286 m_freem(m); 287 break; 288 } 289 } else { 290 if ((so->so_state & SS_ISCONNECTED) == 0) { 291 error = ENOTCONN; 292 goto die; 293 } 294 } 295 error = unp_output(m, control, unp, p); 296 if (nam) 297 unp_disconnect(unp); 298 break; 299 } 300 301 case SOCK_STREAM: 302 #define rcv (&so2->so_rcv) 303 #define snd (&so->so_snd) 304 if (unp->unp_conn == 0) 305 panic("uipc 3"); 306 so2 = unp->unp_conn->unp_socket; 307 if (unp->unp_conn->unp_flags & UNP_WANTCRED) { 308 /* 309 * Credentials are passed only once on 310 * SOCK_STREAM. 311 */ 312 unp->unp_conn->unp_flags &= ~UNP_WANTCRED; 313 control = unp_addsockcred(p, control); 314 } 315 /* 316 * Send to paired receive port, and then reduce 317 * send buffer hiwater marks to maintain backpressure. 318 * Wake up readers. 319 */ 320 if (control) { 321 if (sbappendcontrol(rcv, m, control) == 0) 322 m_freem(control); 323 } else 324 sbappend(rcv, m); 325 snd->sb_mbmax -= 326 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 327 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 328 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 329 unp->unp_conn->unp_cc = rcv->sb_cc; 330 sorwakeup(so2); 331 #undef snd 332 #undef rcv 333 break; 334 335 default: 336 panic("uipc 4"); 337 } 338 break; 339 340 case PRU_ABORT: 341 unp_drop(unp, ECONNABORTED); 342 break; 343 344 case PRU_SENSE: 345 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 346 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 347 so2 = unp->unp_conn->unp_socket; 348 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 349 } 350 ((struct stat *) m)->st_dev = NODEV; 351 if (unp->unp_ino == 0) 352 unp->unp_ino = unp_ino++; 353 ((struct stat *) m)->st_atimespec = 354 ((struct stat *) m)->st_mtimespec = 355 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 356 ((struct stat *) m)->st_ino = unp->unp_ino; 357 return (0); 358 359 case PRU_RCVOOB: 360 error = EOPNOTSUPP; 361 break; 362 363 case PRU_SENDOOB: 364 m_freem(control); 365 m_freem(m); 366 error = EOPNOTSUPP; 367 break; 368 369 case PRU_SOCKADDR: 370 unp_setsockaddr(unp, nam); 371 break; 372 373 case PRU_PEERADDR: 374 unp_setpeeraddr(unp, nam); 375 break; 376 377 default: 378 panic("piusrreq"); 379 } 380 381 release: 382 return (error); 383 } 384 385 /* 386 * Unix domain socket option processing. 387 */ 388 int 389 uipc_ctloutput(op, so, level, optname, mp) 390 int op; 391 struct socket *so; 392 int level, optname; 393 struct mbuf **mp; 394 { 395 struct unpcb *unp = sotounpcb(so); 396 struct mbuf *m = *mp; 397 int optval = 0, error = 0; 398 399 if (level != 0) { 400 error = EINVAL; 401 if (op == PRCO_SETOPT && m) 402 (void) m_free(m); 403 } else switch (op) { 404 405 case PRCO_SETOPT: 406 switch (optname) { 407 case LOCAL_CREDS: 408 if (m == NULL || m->m_len != sizeof(int)) 409 error = EINVAL; 410 else { 411 optval = *mtod(m, int *); 412 switch (optname) { 413 #define OPTSET(bit) \ 414 if (optval) \ 415 unp->unp_flags |= (bit); \ 416 else \ 417 unp->unp_flags &= ~(bit); 418 419 case LOCAL_CREDS: 420 OPTSET(UNP_WANTCRED); 421 break; 422 } 423 } 424 break; 425 #undef OPTSET 426 427 default: 428 error = ENOPROTOOPT; 429 break; 430 } 431 if (m) 432 (void) m_free(m); 433 break; 434 435 case PRCO_GETOPT: 436 switch (optname) { 437 case LOCAL_CREDS: 438 *mp = m = m_get(M_WAIT, MT_SOOPTS); 439 m->m_len = sizeof(int); 440 switch (optname) { 441 442 #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) 443 444 case LOCAL_CREDS: 445 optval = OPTBIT(UNP_WANTCRED); 446 break; 447 } 448 *mtod(m, int *) = optval; 449 break; 450 #undef OPTBIT 451 452 default: 453 error = ENOPROTOOPT; 454 break; 455 } 456 break; 457 } 458 return (error); 459 } 460 461 /* 462 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 463 * for stream sockets, although the total for sender and receiver is 464 * actually only PIPSIZ. 465 * Datagram sockets really use the sendspace as the maximum datagram size, 466 * and don't really want to reserve the sendspace. Their recvspace should 467 * be large enough for at least one max-size datagram plus address. 468 */ 469 #define PIPSIZ 4096 470 u_long unpst_sendspace = PIPSIZ; 471 u_long unpst_recvspace = PIPSIZ; 472 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 473 u_long unpdg_recvspace = 4*1024; 474 475 int unp_rights; /* file descriptors in flight */ 476 477 int 478 unp_attach(so) 479 struct socket *so; 480 { 481 register struct unpcb *unp; 482 struct timeval tv; 483 int error; 484 485 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 486 switch (so->so_type) { 487 488 case SOCK_STREAM: 489 error = soreserve(so, unpst_sendspace, unpst_recvspace); 490 break; 491 492 case SOCK_DGRAM: 493 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 494 break; 495 496 default: 497 panic("unp_attach"); 498 } 499 if (error) 500 return (error); 501 } 502 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 503 if (unp == NULL) 504 return (ENOBUFS); 505 bzero((caddr_t)unp, sizeof(*unp)); 506 unp->unp_socket = so; 507 so->so_pcb = unp; 508 microtime(&tv); 509 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 510 return (0); 511 } 512 513 void 514 unp_detach(unp) 515 register struct unpcb *unp; 516 { 517 518 if (unp->unp_vnode) { 519 unp->unp_vnode->v_socket = 0; 520 vrele(unp->unp_vnode); 521 unp->unp_vnode = 0; 522 } 523 if (unp->unp_conn) 524 unp_disconnect(unp); 525 while (unp->unp_refs) 526 unp_drop(unp->unp_refs, ECONNRESET); 527 soisdisconnected(unp->unp_socket); 528 unp->unp_socket->so_pcb = 0; 529 if (unp->unp_addr) 530 free(unp->unp_addr, M_SONAME); 531 if (unp_rights) { 532 /* 533 * Normally the receive buffer is flushed later, 534 * in sofree, but if our receive buffer holds references 535 * to descriptors that are now garbage, we will dispose 536 * of those descriptor references after the garbage collector 537 * gets them (resulting in a "panic: closef: count < 0"). 538 */ 539 sorflush(unp->unp_socket); 540 free(unp, M_PCB); 541 unp_gc(); 542 } else 543 free(unp, M_PCB); 544 } 545 546 int 547 unp_bind(unp, nam, p) 548 struct unpcb *unp; 549 struct mbuf *nam; 550 struct proc *p; 551 { 552 struct sockaddr_un *sun; 553 register struct vnode *vp; 554 struct vattr vattr; 555 size_t addrlen; 556 int error; 557 struct nameidata nd; 558 559 if (unp->unp_vnode != 0) 560 return (EINVAL); 561 562 /* 563 * Allocate the new sockaddr. We have to allocate one 564 * extra byte so that we can ensure that the pathname 565 * is nul-terminated. 566 */ 567 addrlen = nam->m_len + 1; 568 sun = malloc(addrlen, M_SONAME, M_WAITOK); 569 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 570 *(((char *)sun) + nam->m_len) = '\0'; 571 572 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 573 sun->sun_path, p); 574 575 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 576 if ((error = namei(&nd)) != 0) 577 goto bad; 578 vp = nd.ni_vp; 579 if (vp != NULL) { 580 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 581 if (nd.ni_dvp == vp) 582 vrele(nd.ni_dvp); 583 else 584 vput(nd.ni_dvp); 585 vrele(vp); 586 error = EADDRINUSE; 587 goto bad; 588 } 589 VATTR_NULL(&vattr); 590 vattr.va_type = VSOCK; 591 vattr.va_mode = ACCESSPERMS; 592 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 593 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 594 if (error) 595 goto bad; 596 vp = nd.ni_vp; 597 vp->v_socket = unp->unp_socket; 598 unp->unp_vnode = vp; 599 unp->unp_addrlen = addrlen; 600 unp->unp_addr = sun; 601 VOP_UNLOCK(vp); 602 return (0); 603 604 bad: 605 free(sun, M_SONAME); 606 return (error); 607 } 608 609 int 610 unp_connect(so, nam, p) 611 struct socket *so; 612 struct mbuf *nam; 613 struct proc *p; 614 { 615 register struct sockaddr_un *sun; 616 register struct vnode *vp; 617 register struct socket *so2, *so3; 618 struct unpcb *unp2, *unp3; 619 size_t addrlen; 620 int error; 621 struct nameidata nd; 622 623 /* 624 * Allocate a temporary sockaddr. We have to allocate one extra 625 * byte so that we can ensure that the pathname is nul-terminated. 626 * When we establish the connection, we copy the other PCB's 627 * sockaddr to our own. 628 */ 629 addrlen = nam->m_len + 1; 630 sun = malloc(addrlen, M_SONAME, M_WAITOK); 631 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 632 *(((char *)sun) + nam->m_len) = '\0'; 633 634 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 635 636 if ((error = namei(&nd)) != 0) 637 goto bad2; 638 vp = nd.ni_vp; 639 if (vp->v_type != VSOCK) { 640 error = ENOTSOCK; 641 goto bad; 642 } 643 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 644 goto bad; 645 so2 = vp->v_socket; 646 if (so2 == 0) { 647 error = ECONNREFUSED; 648 goto bad; 649 } 650 if (so->so_type != so2->so_type) { 651 error = EPROTOTYPE; 652 goto bad; 653 } 654 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 655 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 656 (so3 = sonewconn(so2, 0)) == 0) { 657 error = ECONNREFUSED; 658 goto bad; 659 } 660 unp2 = sotounpcb(so2); 661 unp3 = sotounpcb(so3); 662 if (unp2->unp_addr) { 663 unp3->unp_addr = malloc(unp2->unp_addrlen, 664 M_SONAME, M_WAITOK); 665 bcopy(unp2->unp_addr, unp3->unp_addr, 666 unp2->unp_addrlen); 667 unp3->unp_addrlen = unp2->unp_addrlen; 668 } 669 unp3->unp_flags = unp2->unp_flags; 670 so2 = so3; 671 } 672 error = unp_connect2(so, so2); 673 bad: 674 vput(vp); 675 bad2: 676 free(sun, M_SONAME); 677 return (error); 678 } 679 680 int 681 unp_connect2(so, so2) 682 register struct socket *so; 683 register struct socket *so2; 684 { 685 register struct unpcb *unp = sotounpcb(so); 686 register struct unpcb *unp2; 687 688 if (so2->so_type != so->so_type) 689 return (EPROTOTYPE); 690 unp2 = sotounpcb(so2); 691 unp->unp_conn = unp2; 692 switch (so->so_type) { 693 694 case SOCK_DGRAM: 695 unp->unp_nextref = unp2->unp_refs; 696 unp2->unp_refs = unp; 697 soisconnected(so); 698 break; 699 700 case SOCK_STREAM: 701 unp2->unp_conn = unp; 702 soisconnected(so); 703 soisconnected(so2); 704 break; 705 706 default: 707 panic("unp_connect2"); 708 } 709 return (0); 710 } 711 712 void 713 unp_disconnect(unp) 714 struct unpcb *unp; 715 { 716 register struct unpcb *unp2 = unp->unp_conn; 717 718 if (unp2 == 0) 719 return; 720 unp->unp_conn = 0; 721 switch (unp->unp_socket->so_type) { 722 723 case SOCK_DGRAM: 724 if (unp2->unp_refs == unp) 725 unp2->unp_refs = unp->unp_nextref; 726 else { 727 unp2 = unp2->unp_refs; 728 for (;;) { 729 if (unp2 == 0) 730 panic("unp_disconnect"); 731 if (unp2->unp_nextref == unp) 732 break; 733 unp2 = unp2->unp_nextref; 734 } 735 unp2->unp_nextref = unp->unp_nextref; 736 } 737 unp->unp_nextref = 0; 738 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 739 break; 740 741 case SOCK_STREAM: 742 soisdisconnected(unp->unp_socket); 743 unp2->unp_conn = 0; 744 soisdisconnected(unp2->unp_socket); 745 break; 746 } 747 } 748 749 #ifdef notdef 750 unp_abort(unp) 751 struct unpcb *unp; 752 { 753 754 unp_detach(unp); 755 } 756 #endif 757 758 void 759 unp_shutdown(unp) 760 struct unpcb *unp; 761 { 762 struct socket *so; 763 764 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 765 (so = unp->unp_conn->unp_socket)) 766 socantrcvmore(so); 767 } 768 769 void 770 unp_drop(unp, errno) 771 struct unpcb *unp; 772 int errno; 773 { 774 struct socket *so = unp->unp_socket; 775 776 so->so_error = errno; 777 unp_disconnect(unp); 778 if (so->so_head) { 779 so->so_pcb = 0; 780 sofree(so); 781 if (unp->unp_addr) 782 free(unp->unp_addr, M_SONAME); 783 free(unp, M_PCB); 784 } 785 } 786 787 #ifdef notdef 788 unp_drain() 789 { 790 791 } 792 #endif 793 794 int 795 unp_externalize(rights) 796 struct mbuf *rights; 797 { 798 struct proc *p = curproc; /* XXX */ 799 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 800 register int i, *fdp = (int *)(cm + 1); 801 register struct file **rp = (struct file **)ALIGN(cm + 1); 802 register struct file *fp; 803 int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof (struct file *); 804 int f; 805 806 /* Make sure that the recipient has space */ 807 if (!fdavail(p, nfds)) { 808 for (i = 0; i < nfds; i++) { 809 fp = *rp; 810 unp_discard(fp); 811 *rp++ = 0; 812 } 813 return (EMSGSIZE); 814 } 815 816 /* 817 * Add file to the recipient's open file table, converting them 818 * to integer file descriptors as we go. Done in forward order 819 * because an integer will always come in the same place or before 820 * its corresponding struct file pointer. 821 */ 822 for (i = 0; i < nfds; i++) { 823 if (fdalloc(p, 0, &f)) 824 panic("unp_externalize"); 825 fp = *rp++; 826 p->p_fd->fd_ofiles[f] = fp; 827 fp->f_msgcount--; 828 unp_rights--; 829 *fdp++ = f; 830 } 831 832 /* 833 * Adjust length, in case of transition from large struct file 834 * pointers to ints. 835 */ 836 cm->cmsg_len = sizeof(*cm) + (nfds * sizeof(int)); 837 rights->m_len = cm->cmsg_len; 838 return (0); 839 } 840 841 int 842 unp_internalize(control, p) 843 struct mbuf *control; 844 struct proc *p; 845 { 846 struct filedesc *fdescp = p->p_fd; 847 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 848 register struct file **rp; 849 register struct file *fp; 850 register int i, fd, *fdp; 851 int nfds; 852 u_int neededspace; 853 854 /* Sanity check the control message header */ 855 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 856 cm->cmsg_len != control->m_len) 857 return (EINVAL); 858 859 /* Verify that the file descriptors are valid */ 860 nfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 861 fdp = (int *)(cm + 1); 862 for (i = 0; i < nfds; i++) { 863 fd = *fdp++; 864 if ((unsigned)fd >= fdescp->fd_nfiles || 865 fdescp->fd_ofiles[fd] == NULL) 866 return (EBADF); 867 } 868 869 /* Make sure we have room for the struct file pointers */ 870 morespace: 871 neededspace = (ALIGN(sizeof (*cm)) + nfds * sizeof (struct file *)) - 872 control->m_len; 873 if (neededspace > M_TRAILINGSPACE(control)) { 874 875 /* if we already have a cluster, the message is just too big */ 876 if (control->m_flags & M_EXT) 877 return (E2BIG); 878 879 /* allocate a cluster and try again */ 880 MCLGET(control, M_WAIT); 881 if ((control->m_flags & M_EXT) == 0) 882 return (ENOBUFS); /* allocation failed */ 883 884 /* copy the data to the cluster */ 885 bcopy(cm, mtod(control, char *), cm->cmsg_len); 886 cm = mtod(control, struct cmsghdr *); 887 goto morespace; 888 } 889 890 /* adjust message & mbuf to note amount of space actually used. */ 891 cm->cmsg_len += neededspace; 892 control->m_len = cm->cmsg_len; 893 894 /* 895 * Transform the file descriptors into struct file pointers, in 896 * reverse order so that if pointers are bigger than ints, the 897 * int won't get until we're done. 898 */ 899 fdp = ((int *)(cm + 1)) + nfds - 1; 900 rp = ((struct file **)ALIGN(cm + 1)) + nfds - 1; 901 for (i = 0; i < nfds; i++) { 902 fp = fdescp->fd_ofiles[*fdp--]; 903 *rp-- = fp; 904 fp->f_count++; 905 fp->f_msgcount++; 906 unp_rights++; 907 } 908 return (0); 909 } 910 911 struct mbuf * 912 unp_addsockcred(p, control) 913 struct proc *p; 914 struct mbuf *control; 915 { 916 struct cmsghdr *cmp; 917 struct sockcred *sc; 918 struct mbuf *m, *n; 919 int len, i; 920 921 len = sizeof(struct cmsghdr) + SOCKCREDSIZE(p->p_ucred->cr_ngroups); 922 923 m = m_get(M_WAIT, MT_CONTROL); 924 if (len > MLEN) { 925 if (len > MCLBYTES) 926 MEXTMALLOC(m, len, M_WAITOK); 927 else 928 MCLGET(m, M_WAIT); 929 if ((m->m_flags & M_EXT) == 0) { 930 m_free(m); 931 return (control); 932 } 933 } 934 935 m->m_len = len; 936 m->m_next = NULL; 937 cmp = mtod(m, struct cmsghdr *); 938 sc = (struct sockcred *)CMSG_DATA(cmp); 939 cmp->cmsg_len = len; 940 cmp->cmsg_level = SOL_SOCKET; 941 cmp->cmsg_type = SCM_CREDS; 942 sc->sc_uid = p->p_cred->p_ruid; 943 sc->sc_euid = p->p_ucred->cr_uid; 944 sc->sc_gid = p->p_cred->p_rgid; 945 sc->sc_egid = p->p_ucred->cr_gid; 946 sc->sc_ngroups = p->p_ucred->cr_ngroups; 947 for (i = 0; i < sc->sc_ngroups; i++) 948 sc->sc_groups[i] = p->p_ucred->cr_groups[i]; 949 950 /* 951 * If a control message already exists, append us to the end. 952 */ 953 if (control != NULL) { 954 for (n = control; n->m_next != NULL; n = n->m_next) 955 ; 956 n->m_next = m; 957 } else 958 control = m; 959 960 return (control); 961 } 962 963 int unp_defer, unp_gcing; 964 extern struct domain unixdomain; 965 966 void 967 unp_gc() 968 { 969 register struct file *fp, *nextfp; 970 register struct socket *so; 971 struct file **extra_ref, **fpp; 972 int nunref, i; 973 974 if (unp_gcing) 975 return; 976 unp_gcing = 1; 977 unp_defer = 0; 978 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 979 fp->f_flag &= ~(FMARK|FDEFER); 980 do { 981 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 982 if (fp->f_count == 0) 983 continue; 984 if (fp->f_flag & FDEFER) { 985 fp->f_flag &= ~FDEFER; 986 unp_defer--; 987 } else { 988 if (fp->f_flag & FMARK) 989 continue; 990 if (fp->f_count == fp->f_msgcount) 991 continue; 992 fp->f_flag |= FMARK; 993 } 994 if (fp->f_type != DTYPE_SOCKET || 995 (so = (struct socket *)fp->f_data) == 0) 996 continue; 997 if (so->so_proto->pr_domain != &unixdomain || 998 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 999 continue; 1000 #ifdef notdef 1001 if (so->so_rcv.sb_flags & SB_LOCK) { 1002 /* 1003 * This is problematical; it's not clear 1004 * we need to wait for the sockbuf to be 1005 * unlocked (on a uniprocessor, at least), 1006 * and it's also not clear what to do 1007 * if sbwait returns an error due to receipt 1008 * of a signal. If sbwait does return 1009 * an error, we'll go into an infinite 1010 * loop. Delete all of this for now. 1011 */ 1012 (void) sbwait(&so->so_rcv); 1013 goto restart; 1014 } 1015 #endif 1016 unp_scan(so->so_rcv.sb_mb, unp_mark); 1017 } 1018 } while (unp_defer); 1019 /* 1020 * We grab an extra reference to each of the file table entries 1021 * that are not otherwise accessible and then free the rights 1022 * that are stored in messages on them. 1023 * 1024 * The bug in the orginal code is a little tricky, so I'll describe 1025 * what's wrong with it here. 1026 * 1027 * It is incorrect to simply unp_discard each entry for f_msgcount 1028 * times -- consider the case of sockets A and B that contain 1029 * references to each other. On a last close of some other socket, 1030 * we trigger a gc since the number of outstanding rights (unp_rights) 1031 * is non-zero. If during the sweep phase the gc code un_discards, 1032 * we end up doing a (full) closef on the descriptor. A closef on A 1033 * results in the following chain. Closef calls soo_close, which 1034 * calls soclose. Soclose calls first (through the switch 1035 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1036 * returns because the previous instance had set unp_gcing, and 1037 * we return all the way back to soclose, which marks the socket 1038 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1039 * to free up the rights that are queued in messages on the socket A, 1040 * i.e., the reference on B. The sorflush calls via the dom_dispose 1041 * switch unp_dispose, which unp_scans with unp_discard. This second 1042 * instance of unp_discard just calls closef on B. 1043 * 1044 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1045 * which results in another closef on A. Unfortunately, A is already 1046 * being closed, and the descriptor has already been marked with 1047 * SS_NOFDREF, and soclose panics at this point. 1048 * 1049 * Here, we first take an extra reference to each inaccessible 1050 * descriptor. Then, we call sorflush ourself, since we know 1051 * it is a Unix domain socket anyhow. After we destroy all the 1052 * rights carried in messages, we do a last closef to get rid 1053 * of our extra reference. This is the last close, and the 1054 * unp_detach etc will shut down the socket. 1055 * 1056 * 91/09/19, bsy@cs.cmu.edu 1057 */ 1058 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1059 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 1060 fp = nextfp) { 1061 nextfp = fp->f_list.le_next; 1062 if (fp->f_count == 0) 1063 continue; 1064 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1065 *fpp++ = fp; 1066 nunref++; 1067 fp->f_count++; 1068 } 1069 } 1070 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1071 sorflush((struct socket *)(*fpp)->f_data); 1072 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1073 (void) closef(*fpp, (struct proc *)0); 1074 free((caddr_t)extra_ref, M_FILE); 1075 unp_gcing = 0; 1076 } 1077 1078 void 1079 unp_dispose(m) 1080 struct mbuf *m; 1081 { 1082 1083 if (m) 1084 unp_scan(m, unp_discard); 1085 } 1086 1087 void 1088 unp_scan(m0, op) 1089 register struct mbuf *m0; 1090 void (*op) __P((struct file *)); 1091 { 1092 register struct mbuf *m; 1093 register struct file **rp; 1094 register struct cmsghdr *cm; 1095 register int i; 1096 int qfds; 1097 1098 while (m0) { 1099 for (m = m0; m; m = m->m_next) 1100 if (m->m_type == MT_CONTROL && 1101 m->m_len >= sizeof(*cm)) { 1102 cm = mtod(m, struct cmsghdr *); 1103 if (cm->cmsg_level != SOL_SOCKET || 1104 cm->cmsg_type != SCM_RIGHTS) 1105 continue; 1106 qfds = (cm->cmsg_len - sizeof *cm) 1107 / sizeof (struct file *); 1108 rp = (struct file **)(cm + 1); 1109 for (i = 0; i < qfds; i++) 1110 (*op)(*rp++); 1111 break; /* XXX, but saves time */ 1112 } 1113 m0 = m0->m_act; 1114 } 1115 } 1116 1117 void 1118 unp_mark(fp) 1119 struct file *fp; 1120 { 1121 1122 if (fp->f_flag & FMARK) 1123 return; 1124 unp_defer++; 1125 fp->f_flag |= (FMARK|FDEFER); 1126 } 1127 1128 void 1129 unp_discard(fp) 1130 struct file *fp; 1131 { 1132 1133 fp->f_msgcount--; 1134 unp_rights--; 1135 (void) closef(fp, (struct proc *)0); 1136 } 1137