1 /* $NetBSD: uipc_usrreq.c,v 1.48 2000/06/05 16:29:45 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 42 * Copyright (c) 1982, 1986, 1989, 1991, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. All advertising materials mentioning features or use of this software 54 * must display the following acknowledgement: 55 * This product includes software developed by the University of 56 * California, Berkeley and its contributors. 57 * 4. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/filedesc.h> 80 #include <sys/domain.h> 81 #include <sys/protosw.h> 82 #include <sys/socket.h> 83 #include <sys/socketvar.h> 84 #include <sys/unpcb.h> 85 #include <sys/un.h> 86 #include <sys/namei.h> 87 #include <sys/vnode.h> 88 #include <sys/file.h> 89 #include <sys/stat.h> 90 #include <sys/mbuf.h> 91 92 /* 93 * Unix communications domain. 94 * 95 * TODO: 96 * SEQPACKET, RDM 97 * rethink name space problems 98 * need a proper out-of-band 99 */ 100 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL }; 101 ino_t unp_ino; /* prototype for fake inode numbers */ 102 103 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *)); 104 105 int 106 unp_output(m, control, unp, p) 107 struct mbuf *m, *control; 108 struct unpcb *unp; 109 struct proc *p; 110 { 111 struct socket *so2; 112 struct sockaddr_un *sun; 113 114 so2 = unp->unp_conn->unp_socket; 115 if (unp->unp_addr) 116 sun = unp->unp_addr; 117 else 118 sun = &sun_noname; 119 if (unp->unp_conn->unp_flags & UNP_WANTCRED) 120 control = unp_addsockcred(p, control); 121 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 122 control) == 0) { 123 m_freem(control); 124 m_freem(m); 125 return (EINVAL); 126 } else { 127 sorwakeup(so2); 128 return (0); 129 } 130 } 131 132 void 133 unp_setsockaddr(unp, nam) 134 struct unpcb *unp; 135 struct mbuf *nam; 136 { 137 struct sockaddr_un *sun; 138 139 if (unp->unp_addr) 140 sun = unp->unp_addr; 141 else 142 sun = &sun_noname; 143 nam->m_len = sun->sun_len; 144 if (nam->m_len > MLEN) 145 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 146 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 147 } 148 149 void 150 unp_setpeeraddr(unp, nam) 151 struct unpcb *unp; 152 struct mbuf *nam; 153 { 154 struct sockaddr_un *sun; 155 156 if (unp->unp_conn && unp->unp_conn->unp_addr) 157 sun = unp->unp_conn->unp_addr; 158 else 159 sun = &sun_noname; 160 nam->m_len = sun->sun_len; 161 if (nam->m_len > MLEN) 162 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 163 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 164 } 165 166 /*ARGSUSED*/ 167 int 168 uipc_usrreq(so, req, m, nam, control, p) 169 struct socket *so; 170 int req; 171 struct mbuf *m, *nam, *control; 172 struct proc *p; 173 { 174 struct unpcb *unp = sotounpcb(so); 175 struct socket *so2; 176 int error = 0; 177 178 if (req == PRU_CONTROL) 179 return (EOPNOTSUPP); 180 181 #ifdef DIAGNOSTIC 182 if (req != PRU_SEND && req != PRU_SENDOOB && control) 183 panic("uipc_usrreq: unexpected control mbuf"); 184 #endif 185 if (unp == 0 && req != PRU_ATTACH) { 186 error = EINVAL; 187 goto release; 188 } 189 190 switch (req) { 191 192 case PRU_ATTACH: 193 if (unp != 0) { 194 error = EISCONN; 195 break; 196 } 197 error = unp_attach(so); 198 break; 199 200 case PRU_DETACH: 201 unp_detach(unp); 202 break; 203 204 case PRU_BIND: 205 error = unp_bind(unp, nam, p); 206 break; 207 208 case PRU_LISTEN: 209 if (unp->unp_vnode == 0) 210 error = EINVAL; 211 break; 212 213 case PRU_CONNECT: 214 error = unp_connect(so, nam, p); 215 break; 216 217 case PRU_CONNECT2: 218 error = unp_connect2(so, (struct socket *)nam); 219 break; 220 221 case PRU_DISCONNECT: 222 unp_disconnect(unp); 223 break; 224 225 case PRU_ACCEPT: 226 unp_setpeeraddr(unp, nam); 227 break; 228 229 case PRU_SHUTDOWN: 230 socantsendmore(so); 231 unp_shutdown(unp); 232 break; 233 234 case PRU_RCVD: 235 switch (so->so_type) { 236 237 case SOCK_DGRAM: 238 panic("uipc 1"); 239 /*NOTREACHED*/ 240 241 case SOCK_STREAM: 242 #define rcv (&so->so_rcv) 243 #define snd (&so2->so_snd) 244 if (unp->unp_conn == 0) 245 break; 246 so2 = unp->unp_conn->unp_socket; 247 /* 248 * Adjust backpressure on sender 249 * and wakeup any waiting to write. 250 */ 251 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 252 unp->unp_mbcnt = rcv->sb_mbcnt; 253 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 254 unp->unp_cc = rcv->sb_cc; 255 sowwakeup(so2); 256 #undef snd 257 #undef rcv 258 break; 259 260 default: 261 panic("uipc 2"); 262 } 263 break; 264 265 case PRU_SEND: 266 /* 267 * Note: unp_internalize() rejects any control message 268 * other than SCM_RIGHTS, and only allows one. This 269 * has the side-effect of preventing a caller from 270 * forging SCM_CREDS. 271 */ 272 if (control && (error = unp_internalize(control, p))) 273 break; 274 switch (so->so_type) { 275 276 case SOCK_DGRAM: { 277 if (nam) { 278 if ((so->so_state & SS_ISCONNECTED) != 0) { 279 error = EISCONN; 280 goto die; 281 } 282 error = unp_connect(so, nam, p); 283 if (error) { 284 die: 285 m_freem(control); 286 m_freem(m); 287 break; 288 } 289 } else { 290 if ((so->so_state & SS_ISCONNECTED) == 0) { 291 error = ENOTCONN; 292 goto die; 293 } 294 } 295 error = unp_output(m, control, unp, p); 296 if (nam) 297 unp_disconnect(unp); 298 break; 299 } 300 301 case SOCK_STREAM: 302 #define rcv (&so2->so_rcv) 303 #define snd (&so->so_snd) 304 if (unp->unp_conn == 0) 305 panic("uipc 3"); 306 so2 = unp->unp_conn->unp_socket; 307 if (unp->unp_conn->unp_flags & UNP_WANTCRED) { 308 /* 309 * Credentials are passed only once on 310 * SOCK_STREAM. 311 */ 312 unp->unp_conn->unp_flags &= ~UNP_WANTCRED; 313 control = unp_addsockcred(p, control); 314 } 315 /* 316 * Send to paired receive port, and then reduce 317 * send buffer hiwater marks to maintain backpressure. 318 * Wake up readers. 319 */ 320 if (control) { 321 if (sbappendcontrol(rcv, m, control) == 0) 322 m_freem(control); 323 } else 324 sbappend(rcv, m); 325 snd->sb_mbmax -= 326 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 327 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 328 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 329 unp->unp_conn->unp_cc = rcv->sb_cc; 330 sorwakeup(so2); 331 #undef snd 332 #undef rcv 333 break; 334 335 default: 336 panic("uipc 4"); 337 } 338 break; 339 340 case PRU_ABORT: 341 unp_drop(unp, ECONNABORTED); 342 343 #ifdef DIAGNOSTIC 344 if (so->so_pcb == 0) 345 panic("uipc 5: drop killed pcb"); 346 #endif 347 unp_detach(unp); 348 break; 349 350 case PRU_SENSE: 351 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 352 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 353 so2 = unp->unp_conn->unp_socket; 354 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 355 } 356 ((struct stat *) m)->st_dev = NODEV; 357 if (unp->unp_ino == 0) 358 unp->unp_ino = unp_ino++; 359 ((struct stat *) m)->st_atimespec = 360 ((struct stat *) m)->st_mtimespec = 361 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 362 ((struct stat *) m)->st_ino = unp->unp_ino; 363 return (0); 364 365 case PRU_RCVOOB: 366 error = EOPNOTSUPP; 367 break; 368 369 case PRU_SENDOOB: 370 m_freem(control); 371 m_freem(m); 372 error = EOPNOTSUPP; 373 break; 374 375 case PRU_SOCKADDR: 376 unp_setsockaddr(unp, nam); 377 break; 378 379 case PRU_PEERADDR: 380 unp_setpeeraddr(unp, nam); 381 break; 382 383 default: 384 panic("piusrreq"); 385 } 386 387 release: 388 return (error); 389 } 390 391 /* 392 * Unix domain socket option processing. 393 */ 394 int 395 uipc_ctloutput(op, so, level, optname, mp) 396 int op; 397 struct socket *so; 398 int level, optname; 399 struct mbuf **mp; 400 { 401 struct unpcb *unp = sotounpcb(so); 402 struct mbuf *m = *mp; 403 int optval = 0, error = 0; 404 405 if (level != 0) { 406 error = EINVAL; 407 if (op == PRCO_SETOPT && m) 408 (void) m_free(m); 409 } else switch (op) { 410 411 case PRCO_SETOPT: 412 switch (optname) { 413 case LOCAL_CREDS: 414 if (m == NULL || m->m_len != sizeof(int)) 415 error = EINVAL; 416 else { 417 optval = *mtod(m, int *); 418 switch (optname) { 419 #define OPTSET(bit) \ 420 if (optval) \ 421 unp->unp_flags |= (bit); \ 422 else \ 423 unp->unp_flags &= ~(bit); 424 425 case LOCAL_CREDS: 426 OPTSET(UNP_WANTCRED); 427 break; 428 } 429 } 430 break; 431 #undef OPTSET 432 433 default: 434 error = ENOPROTOOPT; 435 break; 436 } 437 if (m) 438 (void) m_free(m); 439 break; 440 441 case PRCO_GETOPT: 442 switch (optname) { 443 case LOCAL_CREDS: 444 *mp = m = m_get(M_WAIT, MT_SOOPTS); 445 m->m_len = sizeof(int); 446 switch (optname) { 447 448 #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) 449 450 case LOCAL_CREDS: 451 optval = OPTBIT(UNP_WANTCRED); 452 break; 453 } 454 *mtod(m, int *) = optval; 455 break; 456 #undef OPTBIT 457 458 default: 459 error = ENOPROTOOPT; 460 break; 461 } 462 break; 463 } 464 return (error); 465 } 466 467 /* 468 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 469 * for stream sockets, although the total for sender and receiver is 470 * actually only PIPSIZ. 471 * Datagram sockets really use the sendspace as the maximum datagram size, 472 * and don't really want to reserve the sendspace. Their recvspace should 473 * be large enough for at least one max-size datagram plus address. 474 */ 475 #define PIPSIZ 4096 476 u_long unpst_sendspace = PIPSIZ; 477 u_long unpst_recvspace = PIPSIZ; 478 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 479 u_long unpdg_recvspace = 4*1024; 480 481 int unp_rights; /* file descriptors in flight */ 482 483 int 484 unp_attach(so) 485 struct socket *so; 486 { 487 struct unpcb *unp; 488 struct timeval tv; 489 int error; 490 491 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 492 switch (so->so_type) { 493 494 case SOCK_STREAM: 495 error = soreserve(so, unpst_sendspace, unpst_recvspace); 496 break; 497 498 case SOCK_DGRAM: 499 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 500 break; 501 502 default: 503 panic("unp_attach"); 504 } 505 if (error) 506 return (error); 507 } 508 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 509 if (unp == NULL) 510 return (ENOBUFS); 511 memset((caddr_t)unp, 0, sizeof(*unp)); 512 unp->unp_socket = so; 513 so->so_pcb = unp; 514 microtime(&tv); 515 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 516 return (0); 517 } 518 519 void 520 unp_detach(unp) 521 struct unpcb *unp; 522 { 523 524 if (unp->unp_vnode) { 525 unp->unp_vnode->v_socket = 0; 526 vrele(unp->unp_vnode); 527 unp->unp_vnode = 0; 528 } 529 if (unp->unp_conn) 530 unp_disconnect(unp); 531 while (unp->unp_refs) 532 unp_drop(unp->unp_refs, ECONNRESET); 533 soisdisconnected(unp->unp_socket); 534 unp->unp_socket->so_pcb = 0; 535 if (unp->unp_addr) 536 free(unp->unp_addr, M_SONAME); 537 if (unp_rights) { 538 /* 539 * Normally the receive buffer is flushed later, 540 * in sofree, but if our receive buffer holds references 541 * to descriptors that are now garbage, we will dispose 542 * of those descriptor references after the garbage collector 543 * gets them (resulting in a "panic: closef: count < 0"). 544 */ 545 sorflush(unp->unp_socket); 546 free(unp, M_PCB); 547 unp_gc(); 548 } else 549 free(unp, M_PCB); 550 } 551 552 int 553 unp_bind(unp, nam, p) 554 struct unpcb *unp; 555 struct mbuf *nam; 556 struct proc *p; 557 { 558 struct sockaddr_un *sun; 559 struct vnode *vp; 560 struct vattr vattr; 561 size_t addrlen; 562 int error; 563 struct nameidata nd; 564 565 if (unp->unp_vnode != 0) 566 return (EINVAL); 567 568 /* 569 * Allocate the new sockaddr. We have to allocate one 570 * extra byte so that we can ensure that the pathname 571 * is nul-terminated. 572 */ 573 addrlen = nam->m_len + 1; 574 sun = malloc(addrlen, M_SONAME, M_WAITOK); 575 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 576 *(((char *)sun) + nam->m_len) = '\0'; 577 578 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 579 sun->sun_path, p); 580 581 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 582 if ((error = namei(&nd)) != 0) 583 goto bad; 584 vp = nd.ni_vp; 585 if (vp != NULL) { 586 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 587 if (nd.ni_dvp == vp) 588 vrele(nd.ni_dvp); 589 else 590 vput(nd.ni_dvp); 591 vrele(vp); 592 error = EADDRINUSE; 593 goto bad; 594 } 595 VATTR_NULL(&vattr); 596 vattr.va_type = VSOCK; 597 vattr.va_mode = ACCESSPERMS; 598 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 599 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 600 if (error) 601 goto bad; 602 vp = nd.ni_vp; 603 vp->v_socket = unp->unp_socket; 604 unp->unp_vnode = vp; 605 unp->unp_addrlen = addrlen; 606 unp->unp_addr = sun; 607 VOP_UNLOCK(vp, 0); 608 return (0); 609 610 bad: 611 free(sun, M_SONAME); 612 return (error); 613 } 614 615 int 616 unp_connect(so, nam, p) 617 struct socket *so; 618 struct mbuf *nam; 619 struct proc *p; 620 { 621 struct sockaddr_un *sun; 622 struct vnode *vp; 623 struct socket *so2, *so3; 624 struct unpcb *unp2, *unp3; 625 size_t addrlen; 626 int error; 627 struct nameidata nd; 628 629 /* 630 * Allocate a temporary sockaddr. We have to allocate one extra 631 * byte so that we can ensure that the pathname is nul-terminated. 632 * When we establish the connection, we copy the other PCB's 633 * sockaddr to our own. 634 */ 635 addrlen = nam->m_len + 1; 636 sun = malloc(addrlen, M_SONAME, M_WAITOK); 637 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 638 *(((char *)sun) + nam->m_len) = '\0'; 639 640 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 641 642 if ((error = namei(&nd)) != 0) 643 goto bad2; 644 vp = nd.ni_vp; 645 if (vp->v_type != VSOCK) { 646 error = ENOTSOCK; 647 goto bad; 648 } 649 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 650 goto bad; 651 so2 = vp->v_socket; 652 if (so2 == 0) { 653 error = ECONNREFUSED; 654 goto bad; 655 } 656 if (so->so_type != so2->so_type) { 657 error = EPROTOTYPE; 658 goto bad; 659 } 660 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 661 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 662 (so3 = sonewconn(so2, 0)) == 0) { 663 error = ECONNREFUSED; 664 goto bad; 665 } 666 unp2 = sotounpcb(so2); 667 unp3 = sotounpcb(so3); 668 if (unp2->unp_addr) { 669 unp3->unp_addr = malloc(unp2->unp_addrlen, 670 M_SONAME, M_WAITOK); 671 memcpy(unp3->unp_addr, unp2->unp_addr, 672 unp2->unp_addrlen); 673 unp3->unp_addrlen = unp2->unp_addrlen; 674 } 675 unp3->unp_flags = unp2->unp_flags; 676 so2 = so3; 677 } 678 error = unp_connect2(so, so2); 679 bad: 680 vput(vp); 681 bad2: 682 free(sun, M_SONAME); 683 return (error); 684 } 685 686 int 687 unp_connect2(so, so2) 688 struct socket *so; 689 struct socket *so2; 690 { 691 struct unpcb *unp = sotounpcb(so); 692 struct unpcb *unp2; 693 694 if (so2->so_type != so->so_type) 695 return (EPROTOTYPE); 696 unp2 = sotounpcb(so2); 697 unp->unp_conn = unp2; 698 switch (so->so_type) { 699 700 case SOCK_DGRAM: 701 unp->unp_nextref = unp2->unp_refs; 702 unp2->unp_refs = unp; 703 soisconnected(so); 704 break; 705 706 case SOCK_STREAM: 707 unp2->unp_conn = unp; 708 soisconnected(so); 709 soisconnected(so2); 710 break; 711 712 default: 713 panic("unp_connect2"); 714 } 715 return (0); 716 } 717 718 void 719 unp_disconnect(unp) 720 struct unpcb *unp; 721 { 722 struct unpcb *unp2 = unp->unp_conn; 723 724 if (unp2 == 0) 725 return; 726 unp->unp_conn = 0; 727 switch (unp->unp_socket->so_type) { 728 729 case SOCK_DGRAM: 730 if (unp2->unp_refs == unp) 731 unp2->unp_refs = unp->unp_nextref; 732 else { 733 unp2 = unp2->unp_refs; 734 for (;;) { 735 if (unp2 == 0) 736 panic("unp_disconnect"); 737 if (unp2->unp_nextref == unp) 738 break; 739 unp2 = unp2->unp_nextref; 740 } 741 unp2->unp_nextref = unp->unp_nextref; 742 } 743 unp->unp_nextref = 0; 744 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 745 break; 746 747 case SOCK_STREAM: 748 soisdisconnected(unp->unp_socket); 749 unp2->unp_conn = 0; 750 soisdisconnected(unp2->unp_socket); 751 break; 752 } 753 } 754 755 #ifdef notdef 756 unp_abort(unp) 757 struct unpcb *unp; 758 { 759 760 unp_detach(unp); 761 } 762 #endif 763 764 void 765 unp_shutdown(unp) 766 struct unpcb *unp; 767 { 768 struct socket *so; 769 770 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 771 (so = unp->unp_conn->unp_socket)) 772 socantrcvmore(so); 773 } 774 775 void 776 unp_drop(unp, errno) 777 struct unpcb *unp; 778 int errno; 779 { 780 struct socket *so = unp->unp_socket; 781 782 so->so_error = errno; 783 unp_disconnect(unp); 784 if (so->so_head) { 785 so->so_pcb = 0; 786 sofree(so); 787 if (unp->unp_addr) 788 free(unp->unp_addr, M_SONAME); 789 free(unp, M_PCB); 790 } 791 } 792 793 #ifdef notdef 794 unp_drain() 795 { 796 797 } 798 #endif 799 800 int 801 unp_externalize(rights) 802 struct mbuf *rights; 803 { 804 struct proc *p = curproc; /* XXX */ 805 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 806 int i, *fdp; 807 struct file **rp; 808 struct file *fp; 809 int nfds, f, error = 0; 810 811 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 812 sizeof(struct file *); 813 fdp = (int *)CMSG_DATA(cm); 814 rp = (struct file **)CMSG_DATA(cm); 815 816 /* Make sure the recipient should be able to see the descriptors.. */ 817 if (p->p_cwdi->cwdi_rdir != NULL) { 818 rp = (struct file **)CMSG_DATA(cm); 819 for (i = 0; i < nfds; i++) { 820 fp = *rp++; 821 /* 822 * If we are in a chroot'ed directory, and 823 * someone wants to pass us a directory, make 824 * sure it's inside the subtree we're allowed 825 * to access. 826 */ 827 if (fp->f_type == DTYPE_VNODE) { 828 struct vnode *vp = (struct vnode *)fp->f_data; 829 if ((vp->v_type == VDIR) && 830 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) { 831 error = EPERM; 832 break; 833 } 834 } 835 } 836 } 837 rp = (struct file **)CMSG_DATA(cm); 838 839 /* Make sure that the recipient has space */ 840 if (error || (!fdavail(p, nfds))) { 841 for (i = 0; i < nfds; i++) { 842 fp = *rp; 843 /* 844 * zero the pointer before calling unp_discard, 845 * since it may end up in unp_gc().. 846 */ 847 *rp++ = 0; 848 unp_discard(fp); 849 } 850 return (error ? error : EMSGSIZE); 851 } 852 853 /* 854 * Add file to the recipient's open file table, converting them 855 * to integer file descriptors as we go. Done in forward order 856 * because an integer will always come in the same place or before 857 * its corresponding struct file pointer. 858 */ 859 for (i = 0; i < nfds; i++) { 860 fp = *rp++; 861 fp->f_msgcount--; 862 unp_rights--; 863 864 if (fdalloc(p, 0, &f)) 865 panic("unp_externalize"); 866 p->p_fd->fd_ofiles[f] = fp; 867 *fdp++ = f; 868 } 869 870 /* 871 * Adjust length, in case of transition from large struct file 872 * pointers to ints. 873 */ 874 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 875 rights->m_len = CMSG_SPACE(nfds * sizeof(int)); 876 return (0); 877 } 878 879 int 880 unp_internalize(control, p) 881 struct mbuf *control; 882 struct proc *p; 883 { 884 struct filedesc *fdescp = p->p_fd; 885 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 886 struct file **rp; 887 struct file *fp; 888 int i, fd, *fdp; 889 int nfds; 890 u_int neededspace; 891 892 /* Sanity check the control message header */ 893 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 894 cm->cmsg_len != control->m_len) 895 return (EINVAL); 896 897 /* Verify that the file descriptors are valid */ 898 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); 899 fdp = (int *)CMSG_DATA(cm); 900 for (i = 0; i < nfds; i++) { 901 fd = *fdp++; 902 if ((unsigned)fd >= fdescp->fd_nfiles || 903 fdescp->fd_ofiles[fd] == NULL || 904 (fdescp->fd_ofiles[fd]->f_iflags & FIF_WANTCLOSE) != 0) 905 return (EBADF); 906 } 907 908 /* Make sure we have room for the struct file pointers */ 909 morespace: 910 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 911 control->m_len; 912 if (neededspace > M_TRAILINGSPACE(control)) { 913 914 /* if we already have a cluster, the message is just too big */ 915 if (control->m_flags & M_EXT) 916 return (E2BIG); 917 918 /* allocate a cluster and try again */ 919 MCLGET(control, M_WAIT); 920 if ((control->m_flags & M_EXT) == 0) 921 return (ENOBUFS); /* allocation failed */ 922 923 /* copy the data to the cluster */ 924 memcpy(mtod(control, char *), cm, cm->cmsg_len); 925 cm = mtod(control, struct cmsghdr *); 926 goto morespace; 927 } 928 929 /* adjust message & mbuf to note amount of space actually used. */ 930 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 931 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 932 933 /* 934 * Transform the file descriptors into struct file pointers, in 935 * reverse order so that if pointers are bigger than ints, the 936 * int won't get until we're done. 937 */ 938 fdp = ((int *)CMSG_DATA(cm)) + nfds - 1; 939 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 940 for (i = 0; i < nfds; i++) { 941 fp = fdescp->fd_ofiles[*fdp--]; 942 FILE_USE(fp); 943 *rp-- = fp; 944 fp->f_count++; 945 fp->f_msgcount++; 946 FILE_UNUSE(fp, NULL); 947 unp_rights++; 948 } 949 return (0); 950 } 951 952 struct mbuf * 953 unp_addsockcred(p, control) 954 struct proc *p; 955 struct mbuf *control; 956 { 957 struct cmsghdr *cmp; 958 struct sockcred *sc; 959 struct mbuf *m, *n; 960 int len, space, i; 961 962 len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 963 space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 964 965 m = m_get(M_WAIT, MT_CONTROL); 966 if (space > MLEN) { 967 if (space > MCLBYTES) 968 MEXTMALLOC(m, space, M_WAITOK); 969 else 970 MCLGET(m, M_WAIT); 971 if ((m->m_flags & M_EXT) == 0) { 972 m_free(m); 973 return (control); 974 } 975 } 976 977 m->m_len = space; 978 m->m_next = NULL; 979 cmp = mtod(m, struct cmsghdr *); 980 sc = (struct sockcred *)CMSG_DATA(cmp); 981 cmp->cmsg_len = len; 982 cmp->cmsg_level = SOL_SOCKET; 983 cmp->cmsg_type = SCM_CREDS; 984 sc->sc_uid = p->p_cred->p_ruid; 985 sc->sc_euid = p->p_ucred->cr_uid; 986 sc->sc_gid = p->p_cred->p_rgid; 987 sc->sc_egid = p->p_ucred->cr_gid; 988 sc->sc_ngroups = p->p_ucred->cr_ngroups; 989 for (i = 0; i < sc->sc_ngroups; i++) 990 sc->sc_groups[i] = p->p_ucred->cr_groups[i]; 991 992 /* 993 * If a control message already exists, append us to the end. 994 */ 995 if (control != NULL) { 996 for (n = control; n->m_next != NULL; n = n->m_next) 997 ; 998 n->m_next = m; 999 } else 1000 control = m; 1001 1002 return (control); 1003 } 1004 1005 int unp_defer, unp_gcing; 1006 extern struct domain unixdomain; 1007 1008 /* 1009 * Comment added long after the fact explaining what's going on here. 1010 * Do a mark-sweep GC of file descriptors on the system, to free up 1011 * any which are caught in flight to an about-to-be-closed socket. 1012 * 1013 * Traditional mark-sweep gc's start at the "root", and mark 1014 * everything reachable from the root (which, in our case would be the 1015 * process table). The mark bits are cleared during the sweep. 1016 * 1017 * XXX For some inexplicable reason (perhaps because the file 1018 * descriptor tables used to live in the u area which could be swapped 1019 * out and thus hard to reach), we do multiple scans over the set of 1020 * descriptors, using use *two* mark bits per object (DEFER and MARK). 1021 * Whenever we find a descriptor which references other descriptors, 1022 * the ones it references are marked with both bits, and we iterate 1023 * over the whole file table until there are no more DEFER bits set. 1024 * We also make an extra pass *before* the GC to clear the mark bits, 1025 * which could have been cleared at almost no cost during the previous 1026 * sweep. 1027 * 1028 * XXX MP: this needs to run with locks such that no other thread of 1029 * control can create or destroy references to file descriptors. it 1030 * may be necessary to defer the GC until later (when the locking 1031 * situation is more hospitable); it may be necessary to push this 1032 * into a separate thread. 1033 */ 1034 void 1035 unp_gc() 1036 { 1037 struct file *fp, *nextfp; 1038 struct socket *so, *so1; 1039 struct file **extra_ref, **fpp; 1040 int nunref, i; 1041 1042 if (unp_gcing) 1043 return; 1044 unp_gcing = 1; 1045 unp_defer = 0; 1046 1047 /* Clear mark bits */ 1048 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 1049 fp->f_flag &= ~(FMARK|FDEFER); 1050 1051 /* 1052 * Iterate over the set of descriptors, marking ones believed 1053 * (based on refcount) to be referenced from a process, and 1054 * marking for rescan descriptors which are queued on a socket. 1055 */ 1056 do { 1057 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 1058 if (fp->f_flag & FDEFER) { 1059 fp->f_flag &= ~FDEFER; 1060 unp_defer--; 1061 #ifdef DIAGNOSTIC 1062 if (fp->f_count == 0) 1063 panic("unp_gc: deferred unreferenced socket"); 1064 #endif 1065 } else { 1066 if (fp->f_count == 0) 1067 continue; 1068 if (fp->f_flag & FMARK) 1069 continue; 1070 if (fp->f_count == fp->f_msgcount) 1071 continue; 1072 } 1073 fp->f_flag |= FMARK; 1074 1075 if (fp->f_type != DTYPE_SOCKET || 1076 (so = (struct socket *)fp->f_data) == 0) 1077 continue; 1078 if (so->so_proto->pr_domain != &unixdomain || 1079 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1080 continue; 1081 #ifdef notdef 1082 if (so->so_rcv.sb_flags & SB_LOCK) { 1083 /* 1084 * This is problematical; it's not clear 1085 * we need to wait for the sockbuf to be 1086 * unlocked (on a uniprocessor, at least), 1087 * and it's also not clear what to do 1088 * if sbwait returns an error due to receipt 1089 * of a signal. If sbwait does return 1090 * an error, we'll go into an infinite 1091 * loop. Delete all of this for now. 1092 */ 1093 (void) sbwait(&so->so_rcv); 1094 goto restart; 1095 } 1096 #endif 1097 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 1098 /* 1099 * mark descriptors referenced from sockets queued on the accept queue as well. 1100 */ 1101 if (so->so_options & SO_ACCEPTCONN) { 1102 for (so1 = so->so_q0.tqh_first; 1103 so1 != 0; 1104 so1 = so1->so_qe.tqe_next) { 1105 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1106 } 1107 for (so1 = so->so_q.tqh_first; 1108 so1 != 0; 1109 so1 = so1->so_qe.tqe_next) { 1110 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1111 } 1112 } 1113 1114 } 1115 } while (unp_defer); 1116 /* 1117 * Sweep pass. Find unmarked descriptors, and free them. 1118 * 1119 * We grab an extra reference to each of the file table entries 1120 * that are not otherwise accessible and then free the rights 1121 * that are stored in messages on them. 1122 * 1123 * The bug in the orginal code is a little tricky, so I'll describe 1124 * what's wrong with it here. 1125 * 1126 * It is incorrect to simply unp_discard each entry for f_msgcount 1127 * times -- consider the case of sockets A and B that contain 1128 * references to each other. On a last close of some other socket, 1129 * we trigger a gc since the number of outstanding rights (unp_rights) 1130 * is non-zero. If during the sweep phase the gc code un_discards, 1131 * we end up doing a (full) closef on the descriptor. A closef on A 1132 * results in the following chain. Closef calls soo_close, which 1133 * calls soclose. Soclose calls first (through the switch 1134 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1135 * returns because the previous instance had set unp_gcing, and 1136 * we return all the way back to soclose, which marks the socket 1137 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1138 * to free up the rights that are queued in messages on the socket A, 1139 * i.e., the reference on B. The sorflush calls via the dom_dispose 1140 * switch unp_dispose, which unp_scans with unp_discard. This second 1141 * instance of unp_discard just calls closef on B. 1142 * 1143 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1144 * which results in another closef on A. Unfortunately, A is already 1145 * being closed, and the descriptor has already been marked with 1146 * SS_NOFDREF, and soclose panics at this point. 1147 * 1148 * Here, we first take an extra reference to each inaccessible 1149 * descriptor. Then, if the inaccessible descriptor is a 1150 * socket, we call sorflush in case it is a Unix domain 1151 * socket. After we destroy all the rights carried in 1152 * messages, we do a last closef to get rid of our extra 1153 * reference. This is the last close, and the unp_detach etc 1154 * will shut down the socket. 1155 * 1156 * 91/09/19, bsy@cs.cmu.edu 1157 */ 1158 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1159 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 1160 fp = nextfp) { 1161 nextfp = fp->f_list.le_next; 1162 if (fp->f_count == 0) 1163 continue; 1164 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1165 *fpp++ = fp; 1166 nunref++; 1167 fp->f_count++; 1168 } 1169 } 1170 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1171 fp = *fpp; 1172 FILE_USE(fp); 1173 if (fp->f_type == DTYPE_SOCKET) 1174 sorflush((struct socket *)fp->f_data); 1175 FILE_UNUSE(fp, NULL); 1176 } 1177 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1178 fp = *fpp; 1179 FILE_USE(fp); 1180 (void) closef(fp, (struct proc *)0); 1181 } 1182 free((caddr_t)extra_ref, M_FILE); 1183 unp_gcing = 0; 1184 } 1185 1186 void 1187 unp_dispose(m) 1188 struct mbuf *m; 1189 { 1190 1191 if (m) 1192 unp_scan(m, unp_discard, 1); 1193 } 1194 1195 void 1196 unp_scan(m0, op, discard) 1197 struct mbuf *m0; 1198 void (*op) __P((struct file *)); 1199 int discard; 1200 { 1201 struct mbuf *m; 1202 struct file **rp; 1203 struct cmsghdr *cm; 1204 int i; 1205 int qfds; 1206 1207 while (m0) { 1208 for (m = m0; m; m = m->m_next) { 1209 if (m->m_type == MT_CONTROL && 1210 m->m_len >= sizeof(*cm)) { 1211 cm = mtod(m, struct cmsghdr *); 1212 if (cm->cmsg_level != SOL_SOCKET || 1213 cm->cmsg_type != SCM_RIGHTS) 1214 continue; 1215 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) 1216 / sizeof(struct file *); 1217 rp = (struct file **)CMSG_DATA(cm); 1218 for (i = 0; i < qfds; i++) { 1219 struct file *fp = *rp; 1220 if (discard) 1221 *rp = 0; 1222 (*op)(fp); 1223 rp++; 1224 } 1225 break; /* XXX, but saves time */ 1226 } 1227 } 1228 m0 = m0->m_act; 1229 } 1230 } 1231 1232 void 1233 unp_mark(fp) 1234 struct file *fp; 1235 { 1236 if (fp == NULL) 1237 return; 1238 1239 if (fp->f_flag & FMARK) 1240 return; 1241 1242 /* If we're already deferred, don't screw up the defer count */ 1243 if (fp->f_flag & FDEFER) 1244 return; 1245 1246 /* 1247 * Minimize the number of deferrals... Sockets are the only 1248 * type of descriptor which can hold references to another 1249 * descriptor, so just mark other descriptors, and defer 1250 * unmarked sockets for the next pass. 1251 */ 1252 if (fp->f_type == DTYPE_SOCKET) { 1253 unp_defer++; 1254 if (fp->f_count == 0) 1255 panic("unp_mark: queued unref"); 1256 fp->f_flag |= FDEFER; 1257 } else { 1258 fp->f_flag |= FMARK; 1259 } 1260 return; 1261 } 1262 1263 void 1264 unp_discard(fp) 1265 struct file *fp; 1266 { 1267 if (fp == NULL) 1268 return; 1269 FILE_USE(fp); 1270 fp->f_msgcount--; 1271 unp_rights--; 1272 (void) closef(fp, (struct proc *)0); 1273 } 1274