1 /* $NetBSD: uipc_usrreq.c,v 1.51 2001/06/14 20:32:47 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 42 * Copyright (c) 1982, 1986, 1989, 1991, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. All advertising materials mentioning features or use of this software 54 * must display the following acknowledgement: 55 * This product includes software developed by the University of 56 * California, Berkeley and its contributors. 57 * 4. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/filedesc.h> 80 #include <sys/domain.h> 81 #include <sys/protosw.h> 82 #include <sys/socket.h> 83 #include <sys/socketvar.h> 84 #include <sys/unpcb.h> 85 #include <sys/un.h> 86 #include <sys/namei.h> 87 #include <sys/vnode.h> 88 #include <sys/file.h> 89 #include <sys/stat.h> 90 #include <sys/mbuf.h> 91 92 /* 93 * Unix communications domain. 94 * 95 * TODO: 96 * SEQPACKET, RDM 97 * rethink name space problems 98 * need a proper out-of-band 99 */ 100 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL }; 101 ino_t unp_ino; /* prototype for fake inode numbers */ 102 103 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *)); 104 105 int 106 unp_output(m, control, unp, p) 107 struct mbuf *m, *control; 108 struct unpcb *unp; 109 struct proc *p; 110 { 111 struct socket *so2; 112 struct sockaddr_un *sun; 113 114 so2 = unp->unp_conn->unp_socket; 115 if (unp->unp_addr) 116 sun = unp->unp_addr; 117 else 118 sun = &sun_noname; 119 if (unp->unp_conn->unp_flags & UNP_WANTCRED) 120 control = unp_addsockcred(p, control); 121 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 122 control) == 0) { 123 m_freem(control); 124 m_freem(m); 125 return (EINVAL); 126 } else { 127 sorwakeup(so2); 128 return (0); 129 } 130 } 131 132 void 133 unp_setsockaddr(unp, nam) 134 struct unpcb *unp; 135 struct mbuf *nam; 136 { 137 struct sockaddr_un *sun; 138 139 if (unp->unp_addr) 140 sun = unp->unp_addr; 141 else 142 sun = &sun_noname; 143 nam->m_len = sun->sun_len; 144 if (nam->m_len > MLEN) 145 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 146 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 147 } 148 149 void 150 unp_setpeeraddr(unp, nam) 151 struct unpcb *unp; 152 struct mbuf *nam; 153 { 154 struct sockaddr_un *sun; 155 156 if (unp->unp_conn && unp->unp_conn->unp_addr) 157 sun = unp->unp_conn->unp_addr; 158 else 159 sun = &sun_noname; 160 nam->m_len = sun->sun_len; 161 if (nam->m_len > MLEN) 162 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 163 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 164 } 165 166 /*ARGSUSED*/ 167 int 168 uipc_usrreq(so, req, m, nam, control, p) 169 struct socket *so; 170 int req; 171 struct mbuf *m, *nam, *control; 172 struct proc *p; 173 { 174 struct unpcb *unp = sotounpcb(so); 175 struct socket *so2; 176 int error = 0; 177 178 if (req == PRU_CONTROL) 179 return (EOPNOTSUPP); 180 181 #ifdef DIAGNOSTIC 182 if (req != PRU_SEND && req != PRU_SENDOOB && control) 183 panic("uipc_usrreq: unexpected control mbuf"); 184 #endif 185 if (unp == 0 && req != PRU_ATTACH) { 186 error = EINVAL; 187 goto release; 188 } 189 190 switch (req) { 191 192 case PRU_ATTACH: 193 if (unp != 0) { 194 error = EISCONN; 195 break; 196 } 197 error = unp_attach(so); 198 break; 199 200 case PRU_DETACH: 201 unp_detach(unp); 202 break; 203 204 case PRU_BIND: 205 error = unp_bind(unp, nam, p); 206 break; 207 208 case PRU_LISTEN: 209 if (unp->unp_vnode == 0) 210 error = EINVAL; 211 break; 212 213 case PRU_CONNECT: 214 error = unp_connect(so, nam, p); 215 break; 216 217 case PRU_CONNECT2: 218 error = unp_connect2(so, (struct socket *)nam); 219 break; 220 221 case PRU_DISCONNECT: 222 unp_disconnect(unp); 223 break; 224 225 case PRU_ACCEPT: 226 unp_setpeeraddr(unp, nam); 227 break; 228 229 case PRU_SHUTDOWN: 230 socantsendmore(so); 231 unp_shutdown(unp); 232 break; 233 234 case PRU_RCVD: 235 switch (so->so_type) { 236 237 case SOCK_DGRAM: 238 panic("uipc 1"); 239 /*NOTREACHED*/ 240 241 case SOCK_STREAM: 242 #define rcv (&so->so_rcv) 243 #define snd (&so2->so_snd) 244 if (unp->unp_conn == 0) 245 break; 246 so2 = unp->unp_conn->unp_socket; 247 /* 248 * Adjust backpressure on sender 249 * and wakeup any waiting to write. 250 */ 251 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 252 unp->unp_mbcnt = rcv->sb_mbcnt; 253 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 254 unp->unp_cc = rcv->sb_cc; 255 sowwakeup(so2); 256 #undef snd 257 #undef rcv 258 break; 259 260 default: 261 panic("uipc 2"); 262 } 263 break; 264 265 case PRU_SEND: 266 /* 267 * Note: unp_internalize() rejects any control message 268 * other than SCM_RIGHTS, and only allows one. This 269 * has the side-effect of preventing a caller from 270 * forging SCM_CREDS. 271 */ 272 if (control && (error = unp_internalize(control, p))) 273 break; 274 switch (so->so_type) { 275 276 case SOCK_DGRAM: { 277 if (nam) { 278 if ((so->so_state & SS_ISCONNECTED) != 0) { 279 error = EISCONN; 280 goto die; 281 } 282 error = unp_connect(so, nam, p); 283 if (error) { 284 die: 285 m_freem(control); 286 m_freem(m); 287 break; 288 } 289 } else { 290 if ((so->so_state & SS_ISCONNECTED) == 0) { 291 error = ENOTCONN; 292 goto die; 293 } 294 } 295 error = unp_output(m, control, unp, p); 296 if (nam) 297 unp_disconnect(unp); 298 break; 299 } 300 301 case SOCK_STREAM: 302 #define rcv (&so2->so_rcv) 303 #define snd (&so->so_snd) 304 if (unp->unp_conn == 0) 305 panic("uipc 3"); 306 so2 = unp->unp_conn->unp_socket; 307 if (unp->unp_conn->unp_flags & UNP_WANTCRED) { 308 /* 309 * Credentials are passed only once on 310 * SOCK_STREAM. 311 */ 312 unp->unp_conn->unp_flags &= ~UNP_WANTCRED; 313 control = unp_addsockcred(p, control); 314 } 315 /* 316 * Send to paired receive port, and then reduce 317 * send buffer hiwater marks to maintain backpressure. 318 * Wake up readers. 319 */ 320 if (control) { 321 if (sbappendcontrol(rcv, m, control) == 0) 322 m_freem(control); 323 } else 324 sbappend(rcv, m); 325 snd->sb_mbmax -= 326 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 327 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 328 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 329 unp->unp_conn->unp_cc = rcv->sb_cc; 330 sorwakeup(so2); 331 #undef snd 332 #undef rcv 333 break; 334 335 default: 336 panic("uipc 4"); 337 } 338 break; 339 340 case PRU_ABORT: 341 unp_drop(unp, ECONNABORTED); 342 343 #ifdef DIAGNOSTIC 344 if (so->so_pcb == 0) 345 panic("uipc 5: drop killed pcb"); 346 #endif 347 unp_detach(unp); 348 break; 349 350 case PRU_SENSE: 351 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 352 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 353 so2 = unp->unp_conn->unp_socket; 354 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 355 } 356 ((struct stat *) m)->st_dev = NODEV; 357 if (unp->unp_ino == 0) 358 unp->unp_ino = unp_ino++; 359 ((struct stat *) m)->st_atimespec = 360 ((struct stat *) m)->st_mtimespec = 361 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 362 ((struct stat *) m)->st_ino = unp->unp_ino; 363 return (0); 364 365 case PRU_RCVOOB: 366 error = EOPNOTSUPP; 367 break; 368 369 case PRU_SENDOOB: 370 m_freem(control); 371 m_freem(m); 372 error = EOPNOTSUPP; 373 break; 374 375 case PRU_SOCKADDR: 376 unp_setsockaddr(unp, nam); 377 break; 378 379 case PRU_PEERADDR: 380 unp_setpeeraddr(unp, nam); 381 break; 382 383 default: 384 panic("piusrreq"); 385 } 386 387 release: 388 return (error); 389 } 390 391 /* 392 * Unix domain socket option processing. 393 */ 394 int 395 uipc_ctloutput(op, so, level, optname, mp) 396 int op; 397 struct socket *so; 398 int level, optname; 399 struct mbuf **mp; 400 { 401 struct unpcb *unp = sotounpcb(so); 402 struct mbuf *m = *mp; 403 int optval = 0, error = 0; 404 405 if (level != 0) { 406 error = EINVAL; 407 if (op == PRCO_SETOPT && m) 408 (void) m_free(m); 409 } else switch (op) { 410 411 case PRCO_SETOPT: 412 switch (optname) { 413 case LOCAL_CREDS: 414 if (m == NULL || m->m_len != sizeof(int)) 415 error = EINVAL; 416 else { 417 optval = *mtod(m, int *); 418 switch (optname) { 419 #define OPTSET(bit) \ 420 if (optval) \ 421 unp->unp_flags |= (bit); \ 422 else \ 423 unp->unp_flags &= ~(bit); 424 425 case LOCAL_CREDS: 426 OPTSET(UNP_WANTCRED); 427 break; 428 } 429 } 430 break; 431 #undef OPTSET 432 433 default: 434 error = ENOPROTOOPT; 435 break; 436 } 437 if (m) 438 (void) m_free(m); 439 break; 440 441 case PRCO_GETOPT: 442 switch (optname) { 443 case LOCAL_CREDS: 444 *mp = m = m_get(M_WAIT, MT_SOOPTS); 445 m->m_len = sizeof(int); 446 switch (optname) { 447 448 #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) 449 450 case LOCAL_CREDS: 451 optval = OPTBIT(UNP_WANTCRED); 452 break; 453 } 454 *mtod(m, int *) = optval; 455 break; 456 #undef OPTBIT 457 458 default: 459 error = ENOPROTOOPT; 460 break; 461 } 462 break; 463 } 464 return (error); 465 } 466 467 /* 468 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 469 * for stream sockets, although the total for sender and receiver is 470 * actually only PIPSIZ. 471 * Datagram sockets really use the sendspace as the maximum datagram size, 472 * and don't really want to reserve the sendspace. Their recvspace should 473 * be large enough for at least one max-size datagram plus address. 474 */ 475 #define PIPSIZ 4096 476 u_long unpst_sendspace = PIPSIZ; 477 u_long unpst_recvspace = PIPSIZ; 478 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 479 u_long unpdg_recvspace = 4*1024; 480 481 int unp_rights; /* file descriptors in flight */ 482 483 int 484 unp_attach(so) 485 struct socket *so; 486 { 487 struct unpcb *unp; 488 struct timeval tv; 489 int error; 490 491 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 492 switch (so->so_type) { 493 494 case SOCK_STREAM: 495 error = soreserve(so, unpst_sendspace, unpst_recvspace); 496 break; 497 498 case SOCK_DGRAM: 499 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 500 break; 501 502 default: 503 panic("unp_attach"); 504 } 505 if (error) 506 return (error); 507 } 508 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 509 if (unp == NULL) 510 return (ENOBUFS); 511 memset((caddr_t)unp, 0, sizeof(*unp)); 512 unp->unp_socket = so; 513 so->so_pcb = unp; 514 microtime(&tv); 515 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 516 return (0); 517 } 518 519 void 520 unp_detach(unp) 521 struct unpcb *unp; 522 { 523 524 if (unp->unp_vnode) { 525 unp->unp_vnode->v_socket = 0; 526 vrele(unp->unp_vnode); 527 unp->unp_vnode = 0; 528 } 529 if (unp->unp_conn) 530 unp_disconnect(unp); 531 while (unp->unp_refs) 532 unp_drop(unp->unp_refs, ECONNRESET); 533 soisdisconnected(unp->unp_socket); 534 unp->unp_socket->so_pcb = 0; 535 if (unp->unp_addr) 536 free(unp->unp_addr, M_SONAME); 537 if (unp_rights) { 538 /* 539 * Normally the receive buffer is flushed later, 540 * in sofree, but if our receive buffer holds references 541 * to descriptors that are now garbage, we will dispose 542 * of those descriptor references after the garbage collector 543 * gets them (resulting in a "panic: closef: count < 0"). 544 */ 545 sorflush(unp->unp_socket); 546 free(unp, M_PCB); 547 unp_gc(); 548 } else 549 free(unp, M_PCB); 550 } 551 552 int 553 unp_bind(unp, nam, p) 554 struct unpcb *unp; 555 struct mbuf *nam; 556 struct proc *p; 557 { 558 struct sockaddr_un *sun; 559 struct vnode *vp; 560 struct vattr vattr; 561 size_t addrlen; 562 int error; 563 struct nameidata nd; 564 565 if (unp->unp_vnode != 0) 566 return (EINVAL); 567 568 /* 569 * Allocate the new sockaddr. We have to allocate one 570 * extra byte so that we can ensure that the pathname 571 * is nul-terminated. 572 */ 573 addrlen = nam->m_len + 1; 574 sun = malloc(addrlen, M_SONAME, M_WAITOK); 575 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 576 *(((char *)sun) + nam->m_len) = '\0'; 577 578 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 579 sun->sun_path, p); 580 581 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 582 if ((error = namei(&nd)) != 0) 583 goto bad; 584 vp = nd.ni_vp; 585 if (vp != NULL) { 586 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 587 if (nd.ni_dvp == vp) 588 vrele(nd.ni_dvp); 589 else 590 vput(nd.ni_dvp); 591 vrele(vp); 592 error = EADDRINUSE; 593 goto bad; 594 } 595 VATTR_NULL(&vattr); 596 vattr.va_type = VSOCK; 597 vattr.va_mode = ACCESSPERMS; 598 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 599 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 600 if (error) 601 goto bad; 602 vp = nd.ni_vp; 603 vp->v_socket = unp->unp_socket; 604 unp->unp_vnode = vp; 605 unp->unp_addrlen = addrlen; 606 unp->unp_addr = sun; 607 VOP_UNLOCK(vp, 0); 608 return (0); 609 610 bad: 611 free(sun, M_SONAME); 612 return (error); 613 } 614 615 int 616 unp_connect(so, nam, p) 617 struct socket *so; 618 struct mbuf *nam; 619 struct proc *p; 620 { 621 struct sockaddr_un *sun; 622 struct vnode *vp; 623 struct socket *so2, *so3; 624 struct unpcb *unp2, *unp3; 625 size_t addrlen; 626 int error; 627 struct nameidata nd; 628 629 /* 630 * Allocate a temporary sockaddr. We have to allocate one extra 631 * byte so that we can ensure that the pathname is nul-terminated. 632 * When we establish the connection, we copy the other PCB's 633 * sockaddr to our own. 634 */ 635 addrlen = nam->m_len + 1; 636 sun = malloc(addrlen, M_SONAME, M_WAITOK); 637 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 638 *(((char *)sun) + nam->m_len) = '\0'; 639 640 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 641 642 if ((error = namei(&nd)) != 0) 643 goto bad2; 644 vp = nd.ni_vp; 645 if (vp->v_type != VSOCK) { 646 error = ENOTSOCK; 647 goto bad; 648 } 649 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 650 goto bad; 651 so2 = vp->v_socket; 652 if (so2 == 0) { 653 error = ECONNREFUSED; 654 goto bad; 655 } 656 if (so->so_type != so2->so_type) { 657 error = EPROTOTYPE; 658 goto bad; 659 } 660 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 661 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 662 (so3 = sonewconn(so2, 0)) == 0) { 663 error = ECONNREFUSED; 664 goto bad; 665 } 666 unp2 = sotounpcb(so2); 667 unp3 = sotounpcb(so3); 668 if (unp2->unp_addr) { 669 unp3->unp_addr = malloc(unp2->unp_addrlen, 670 M_SONAME, M_WAITOK); 671 memcpy(unp3->unp_addr, unp2->unp_addr, 672 unp2->unp_addrlen); 673 unp3->unp_addrlen = unp2->unp_addrlen; 674 } 675 unp3->unp_flags = unp2->unp_flags; 676 so2 = so3; 677 } 678 error = unp_connect2(so, so2); 679 bad: 680 vput(vp); 681 bad2: 682 free(sun, M_SONAME); 683 return (error); 684 } 685 686 int 687 unp_connect2(so, so2) 688 struct socket *so; 689 struct socket *so2; 690 { 691 struct unpcb *unp = sotounpcb(so); 692 struct unpcb *unp2; 693 694 if (so2->so_type != so->so_type) 695 return (EPROTOTYPE); 696 unp2 = sotounpcb(so2); 697 unp->unp_conn = unp2; 698 switch (so->so_type) { 699 700 case SOCK_DGRAM: 701 unp->unp_nextref = unp2->unp_refs; 702 unp2->unp_refs = unp; 703 soisconnected(so); 704 break; 705 706 case SOCK_STREAM: 707 unp2->unp_conn = unp; 708 soisconnected(so); 709 soisconnected(so2); 710 break; 711 712 default: 713 panic("unp_connect2"); 714 } 715 return (0); 716 } 717 718 void 719 unp_disconnect(unp) 720 struct unpcb *unp; 721 { 722 struct unpcb *unp2 = unp->unp_conn; 723 724 if (unp2 == 0) 725 return; 726 unp->unp_conn = 0; 727 switch (unp->unp_socket->so_type) { 728 729 case SOCK_DGRAM: 730 if (unp2->unp_refs == unp) 731 unp2->unp_refs = unp->unp_nextref; 732 else { 733 unp2 = unp2->unp_refs; 734 for (;;) { 735 if (unp2 == 0) 736 panic("unp_disconnect"); 737 if (unp2->unp_nextref == unp) 738 break; 739 unp2 = unp2->unp_nextref; 740 } 741 unp2->unp_nextref = unp->unp_nextref; 742 } 743 unp->unp_nextref = 0; 744 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 745 break; 746 747 case SOCK_STREAM: 748 soisdisconnected(unp->unp_socket); 749 unp2->unp_conn = 0; 750 soisdisconnected(unp2->unp_socket); 751 break; 752 } 753 } 754 755 #ifdef notdef 756 unp_abort(unp) 757 struct unpcb *unp; 758 { 759 760 unp_detach(unp); 761 } 762 #endif 763 764 void 765 unp_shutdown(unp) 766 struct unpcb *unp; 767 { 768 struct socket *so; 769 770 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 771 (so = unp->unp_conn->unp_socket)) 772 socantrcvmore(so); 773 } 774 775 void 776 unp_drop(unp, errno) 777 struct unpcb *unp; 778 int errno; 779 { 780 struct socket *so = unp->unp_socket; 781 782 so->so_error = errno; 783 unp_disconnect(unp); 784 if (so->so_head) { 785 so->so_pcb = 0; 786 sofree(so); 787 if (unp->unp_addr) 788 free(unp->unp_addr, M_SONAME); 789 free(unp, M_PCB); 790 } 791 } 792 793 #ifdef notdef 794 unp_drain() 795 { 796 797 } 798 #endif 799 800 int 801 unp_externalize(rights) 802 struct mbuf *rights; 803 { 804 struct proc *p = curproc; /* XXX */ 805 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 806 int i, *fdp; 807 struct file **rp; 808 struct file *fp; 809 int nfds, error = 0; 810 811 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 812 sizeof(struct file *); 813 rp = (struct file **)CMSG_DATA(cm); 814 815 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 816 817 /* Make sure the recipient should be able to see the descriptors.. */ 818 if (p->p_cwdi->cwdi_rdir != NULL) { 819 rp = (struct file **)CMSG_DATA(cm); 820 for (i = 0; i < nfds; i++) { 821 fp = *rp++; 822 /* 823 * If we are in a chroot'ed directory, and 824 * someone wants to pass us a directory, make 825 * sure it's inside the subtree we're allowed 826 * to access. 827 */ 828 if (fp->f_type == DTYPE_VNODE) { 829 struct vnode *vp = (struct vnode *)fp->f_data; 830 if ((vp->v_type == VDIR) && 831 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) { 832 error = EPERM; 833 break; 834 } 835 } 836 } 837 } 838 839 restart: 840 rp = (struct file **)CMSG_DATA(cm); 841 if (error != 0) { 842 for (i = 0; i < nfds; i++) { 843 fp = *rp; 844 /* 845 * zero the pointer before calling unp_discard, 846 * since it may end up in unp_gc().. 847 */ 848 *rp++ = 0; 849 unp_discard(fp); 850 } 851 goto out; 852 } 853 854 /* 855 * First loop -- allocate file descriptor table slots for the 856 * new descriptors. 857 */ 858 for (i = 0; i < nfds; i++) { 859 fp = *rp++; 860 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 861 /* 862 * Back out what we've done so far. 863 */ 864 for (--i; i >= 0; i--) 865 fdremove(p->p_fd, fdp[i]); 866 867 if (error == ENOSPC) { 868 fdexpand(p); 869 error = 0; 870 } else { 871 /* 872 * This is the error that has historically 873 * been returned, and some callers may 874 * expect it. 875 */ 876 error = EMSGSIZE; 877 } 878 goto restart; 879 } 880 881 /* 882 * Make the slot reference the descriptor so that 883 * fdalloc() works properly.. We finalize it all 884 * in the loop below. 885 */ 886 p->p_fd->fd_ofiles[fdp[i]] = fp; 887 } 888 889 /* 890 * Now that adding them has succeeded, update all of the 891 * descriptor passing state. 892 */ 893 rp = (struct file **)CMSG_DATA(cm); 894 for (i = 0; i < nfds; i++) { 895 fp = *rp++; 896 fp->f_msgcount--; 897 unp_rights--; 898 } 899 900 /* 901 * Copy temporary array to message and adjust length, in case of 902 * transition from large struct file pointers to ints. 903 */ 904 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 905 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 906 rights->m_len = CMSG_SPACE(nfds * sizeof(int)); 907 out: 908 free(fdp, M_TEMP); 909 return (error); 910 } 911 912 int 913 unp_internalize(control, p) 914 struct mbuf *control; 915 struct proc *p; 916 { 917 struct filedesc *fdescp = p->p_fd; 918 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 919 struct file **rp; 920 struct file *fp; 921 int i, fd, *fdp; 922 int nfds; 923 u_int neededspace; 924 925 /* Sanity check the control message header */ 926 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 927 cm->cmsg_len != control->m_len) 928 return (EINVAL); 929 930 /* Verify that the file descriptors are valid */ 931 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); 932 fdp = (int *)CMSG_DATA(cm); 933 for (i = 0; i < nfds; i++) { 934 fd = *fdp++; 935 if (fd_getfile(fdescp, fd) == NULL) 936 return (EBADF); 937 } 938 939 /* Make sure we have room for the struct file pointers */ 940 morespace: 941 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 942 control->m_len; 943 if (neededspace > M_TRAILINGSPACE(control)) { 944 945 /* if we already have a cluster, the message is just too big */ 946 if (control->m_flags & M_EXT) 947 return (E2BIG); 948 949 /* allocate a cluster and try again */ 950 MCLGET(control, M_WAIT); 951 if ((control->m_flags & M_EXT) == 0) 952 return (ENOBUFS); /* allocation failed */ 953 954 /* copy the data to the cluster */ 955 memcpy(mtod(control, char *), cm, cm->cmsg_len); 956 cm = mtod(control, struct cmsghdr *); 957 goto morespace; 958 } 959 960 /* adjust message & mbuf to note amount of space actually used. */ 961 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 962 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 963 964 /* 965 * Transform the file descriptors into struct file pointers, in 966 * reverse order so that if pointers are bigger than ints, the 967 * int won't get until we're done. 968 */ 969 fdp = ((int *)CMSG_DATA(cm)) + nfds - 1; 970 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 971 for (i = 0; i < nfds; i++) { 972 fp = fdescp->fd_ofiles[*fdp--]; 973 FILE_USE(fp); 974 *rp-- = fp; 975 fp->f_count++; 976 fp->f_msgcount++; 977 FILE_UNUSE(fp, NULL); 978 unp_rights++; 979 } 980 return (0); 981 } 982 983 struct mbuf * 984 unp_addsockcred(p, control) 985 struct proc *p; 986 struct mbuf *control; 987 { 988 struct cmsghdr *cmp; 989 struct sockcred *sc; 990 struct mbuf *m, *n; 991 int len, space, i; 992 993 len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 994 space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 995 996 m = m_get(M_WAIT, MT_CONTROL); 997 if (space > MLEN) { 998 if (space > MCLBYTES) 999 MEXTMALLOC(m, space, M_WAITOK); 1000 else 1001 MCLGET(m, M_WAIT); 1002 if ((m->m_flags & M_EXT) == 0) { 1003 m_free(m); 1004 return (control); 1005 } 1006 } 1007 1008 m->m_len = space; 1009 m->m_next = NULL; 1010 cmp = mtod(m, struct cmsghdr *); 1011 sc = (struct sockcred *)CMSG_DATA(cmp); 1012 cmp->cmsg_len = len; 1013 cmp->cmsg_level = SOL_SOCKET; 1014 cmp->cmsg_type = SCM_CREDS; 1015 sc->sc_uid = p->p_cred->p_ruid; 1016 sc->sc_euid = p->p_ucred->cr_uid; 1017 sc->sc_gid = p->p_cred->p_rgid; 1018 sc->sc_egid = p->p_ucred->cr_gid; 1019 sc->sc_ngroups = p->p_ucred->cr_ngroups; 1020 for (i = 0; i < sc->sc_ngroups; i++) 1021 sc->sc_groups[i] = p->p_ucred->cr_groups[i]; 1022 1023 /* 1024 * If a control message already exists, append us to the end. 1025 */ 1026 if (control != NULL) { 1027 for (n = control; n->m_next != NULL; n = n->m_next) 1028 ; 1029 n->m_next = m; 1030 } else 1031 control = m; 1032 1033 return (control); 1034 } 1035 1036 int unp_defer, unp_gcing; 1037 extern struct domain unixdomain; 1038 1039 /* 1040 * Comment added long after the fact explaining what's going on here. 1041 * Do a mark-sweep GC of file descriptors on the system, to free up 1042 * any which are caught in flight to an about-to-be-closed socket. 1043 * 1044 * Traditional mark-sweep gc's start at the "root", and mark 1045 * everything reachable from the root (which, in our case would be the 1046 * process table). The mark bits are cleared during the sweep. 1047 * 1048 * XXX For some inexplicable reason (perhaps because the file 1049 * descriptor tables used to live in the u area which could be swapped 1050 * out and thus hard to reach), we do multiple scans over the set of 1051 * descriptors, using use *two* mark bits per object (DEFER and MARK). 1052 * Whenever we find a descriptor which references other descriptors, 1053 * the ones it references are marked with both bits, and we iterate 1054 * over the whole file table until there are no more DEFER bits set. 1055 * We also make an extra pass *before* the GC to clear the mark bits, 1056 * which could have been cleared at almost no cost during the previous 1057 * sweep. 1058 * 1059 * XXX MP: this needs to run with locks such that no other thread of 1060 * control can create or destroy references to file descriptors. it 1061 * may be necessary to defer the GC until later (when the locking 1062 * situation is more hospitable); it may be necessary to push this 1063 * into a separate thread. 1064 */ 1065 void 1066 unp_gc() 1067 { 1068 struct file *fp, *nextfp; 1069 struct socket *so, *so1; 1070 struct file **extra_ref, **fpp; 1071 int nunref, i; 1072 1073 if (unp_gcing) 1074 return; 1075 unp_gcing = 1; 1076 unp_defer = 0; 1077 1078 /* Clear mark bits */ 1079 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 1080 fp->f_flag &= ~(FMARK|FDEFER); 1081 1082 /* 1083 * Iterate over the set of descriptors, marking ones believed 1084 * (based on refcount) to be referenced from a process, and 1085 * marking for rescan descriptors which are queued on a socket. 1086 */ 1087 do { 1088 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 1089 if (fp->f_flag & FDEFER) { 1090 fp->f_flag &= ~FDEFER; 1091 unp_defer--; 1092 #ifdef DIAGNOSTIC 1093 if (fp->f_count == 0) 1094 panic("unp_gc: deferred unreferenced socket"); 1095 #endif 1096 } else { 1097 if (fp->f_count == 0) 1098 continue; 1099 if (fp->f_flag & FMARK) 1100 continue; 1101 if (fp->f_count == fp->f_msgcount) 1102 continue; 1103 } 1104 fp->f_flag |= FMARK; 1105 1106 if (fp->f_type != DTYPE_SOCKET || 1107 (so = (struct socket *)fp->f_data) == 0) 1108 continue; 1109 if (so->so_proto->pr_domain != &unixdomain || 1110 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1111 continue; 1112 #ifdef notdef 1113 if (so->so_rcv.sb_flags & SB_LOCK) { 1114 /* 1115 * This is problematical; it's not clear 1116 * we need to wait for the sockbuf to be 1117 * unlocked (on a uniprocessor, at least), 1118 * and it's also not clear what to do 1119 * if sbwait returns an error due to receipt 1120 * of a signal. If sbwait does return 1121 * an error, we'll go into an infinite 1122 * loop. Delete all of this for now. 1123 */ 1124 (void) sbwait(&so->so_rcv); 1125 goto restart; 1126 } 1127 #endif 1128 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 1129 /* 1130 * mark descriptors referenced from sockets queued on the accept queue as well. 1131 */ 1132 if (so->so_options & SO_ACCEPTCONN) { 1133 for (so1 = so->so_q0.tqh_first; 1134 so1 != 0; 1135 so1 = so1->so_qe.tqe_next) { 1136 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1137 } 1138 for (so1 = so->so_q.tqh_first; 1139 so1 != 0; 1140 so1 = so1->so_qe.tqe_next) { 1141 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1142 } 1143 } 1144 1145 } 1146 } while (unp_defer); 1147 /* 1148 * Sweep pass. Find unmarked descriptors, and free them. 1149 * 1150 * We grab an extra reference to each of the file table entries 1151 * that are not otherwise accessible and then free the rights 1152 * that are stored in messages on them. 1153 * 1154 * The bug in the orginal code is a little tricky, so I'll describe 1155 * what's wrong with it here. 1156 * 1157 * It is incorrect to simply unp_discard each entry for f_msgcount 1158 * times -- consider the case of sockets A and B that contain 1159 * references to each other. On a last close of some other socket, 1160 * we trigger a gc since the number of outstanding rights (unp_rights) 1161 * is non-zero. If during the sweep phase the gc code un_discards, 1162 * we end up doing a (full) closef on the descriptor. A closef on A 1163 * results in the following chain. Closef calls soo_close, which 1164 * calls soclose. Soclose calls first (through the switch 1165 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1166 * returns because the previous instance had set unp_gcing, and 1167 * we return all the way back to soclose, which marks the socket 1168 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1169 * to free up the rights that are queued in messages on the socket A, 1170 * i.e., the reference on B. The sorflush calls via the dom_dispose 1171 * switch unp_dispose, which unp_scans with unp_discard. This second 1172 * instance of unp_discard just calls closef on B. 1173 * 1174 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1175 * which results in another closef on A. Unfortunately, A is already 1176 * being closed, and the descriptor has already been marked with 1177 * SS_NOFDREF, and soclose panics at this point. 1178 * 1179 * Here, we first take an extra reference to each inaccessible 1180 * descriptor. Then, if the inaccessible descriptor is a 1181 * socket, we call sorflush in case it is a Unix domain 1182 * socket. After we destroy all the rights carried in 1183 * messages, we do a last closef to get rid of our extra 1184 * reference. This is the last close, and the unp_detach etc 1185 * will shut down the socket. 1186 * 1187 * 91/09/19, bsy@cs.cmu.edu 1188 */ 1189 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1190 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 1191 fp = nextfp) { 1192 nextfp = fp->f_list.le_next; 1193 if (fp->f_count == 0) 1194 continue; 1195 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1196 *fpp++ = fp; 1197 nunref++; 1198 fp->f_count++; 1199 } 1200 } 1201 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1202 fp = *fpp; 1203 FILE_USE(fp); 1204 if (fp->f_type == DTYPE_SOCKET) 1205 sorflush((struct socket *)fp->f_data); 1206 FILE_UNUSE(fp, NULL); 1207 } 1208 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1209 fp = *fpp; 1210 FILE_USE(fp); 1211 (void) closef(fp, (struct proc *)0); 1212 } 1213 free((caddr_t)extra_ref, M_FILE); 1214 unp_gcing = 0; 1215 } 1216 1217 void 1218 unp_dispose(m) 1219 struct mbuf *m; 1220 { 1221 1222 if (m) 1223 unp_scan(m, unp_discard, 1); 1224 } 1225 1226 void 1227 unp_scan(m0, op, discard) 1228 struct mbuf *m0; 1229 void (*op) __P((struct file *)); 1230 int discard; 1231 { 1232 struct mbuf *m; 1233 struct file **rp; 1234 struct cmsghdr *cm; 1235 int i; 1236 int qfds; 1237 1238 while (m0) { 1239 for (m = m0; m; m = m->m_next) { 1240 if (m->m_type == MT_CONTROL && 1241 m->m_len >= sizeof(*cm)) { 1242 cm = mtod(m, struct cmsghdr *); 1243 if (cm->cmsg_level != SOL_SOCKET || 1244 cm->cmsg_type != SCM_RIGHTS) 1245 continue; 1246 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) 1247 / sizeof(struct file *); 1248 rp = (struct file **)CMSG_DATA(cm); 1249 for (i = 0; i < qfds; i++) { 1250 struct file *fp = *rp; 1251 if (discard) 1252 *rp = 0; 1253 (*op)(fp); 1254 rp++; 1255 } 1256 break; /* XXX, but saves time */ 1257 } 1258 } 1259 m0 = m0->m_act; 1260 } 1261 } 1262 1263 void 1264 unp_mark(fp) 1265 struct file *fp; 1266 { 1267 if (fp == NULL) 1268 return; 1269 1270 if (fp->f_flag & FMARK) 1271 return; 1272 1273 /* If we're already deferred, don't screw up the defer count */ 1274 if (fp->f_flag & FDEFER) 1275 return; 1276 1277 /* 1278 * Minimize the number of deferrals... Sockets are the only 1279 * type of descriptor which can hold references to another 1280 * descriptor, so just mark other descriptors, and defer 1281 * unmarked sockets for the next pass. 1282 */ 1283 if (fp->f_type == DTYPE_SOCKET) { 1284 unp_defer++; 1285 if (fp->f_count == 0) 1286 panic("unp_mark: queued unref"); 1287 fp->f_flag |= FDEFER; 1288 } else { 1289 fp->f_flag |= FMARK; 1290 } 1291 return; 1292 } 1293 1294 void 1295 unp_discard(fp) 1296 struct file *fp; 1297 { 1298 if (fp == NULL) 1299 return; 1300 FILE_USE(fp); 1301 fp->f_msgcount--; 1302 unp_rights--; 1303 (void) closef(fp, (struct proc *)0); 1304 } 1305